[llvm-branch-commits] [AArch64][GlobalISel] Add support for lowering trunc stores of vector bools. (PR #121169)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Thu Dec 26 16:12:44 PST 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-aarch64
Author: Amara Emerson (aemerson)
<details>
<summary>Changes</summary>
This is essentially a port of TargetLowering::scalarizeVectorStore(), which
is used for the case where we have something like a store of <8 x s8> truncating
to <8 x s1> in memory. The naive lowering is a sequence of extracts to compute
a scalar value to store.
AArch64's DAG implementation has some more smarts to improve this further which
we can do later.
---
Patch is 62.84 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/121169.diff
5 Files Affected:
- (modified) llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp (+33-2)
- (modified) llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp (+2-1)
- (modified) llvm/test/CodeGen/AArch64/GlobalISel/legalize-bitcast.mir (+54-5)
- (modified) llvm/test/CodeGen/AArch64/GlobalISel/legalize-store-vector-bools.mir (+61-7)
- (modified) llvm/test/CodeGen/AArch64/vec-combine-compare-to-bitmask.ll (+928-300)
``````````diff
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 6e87acef6ca7d3..7ffd00bf4cd689 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -4142,9 +4142,40 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerStore(GStore &StoreMI) {
}
if (MemTy.isVector()) {
- // TODO: Handle vector trunc stores
- if (MemTy != SrcTy)
+ LLT MemScalarTy = MemTy.getElementType();
+ if (MemTy != SrcTy) {
+ if (!MemScalarTy.isByteSized()) {
+ // We need to build an integer scalar of the vector bit pattern.
+ // It's not legal for us to add padding when storing a vector.
+ unsigned NumBits = MemTy.getSizeInBits();
+ LLT IntTy = LLT::scalar(NumBits);
+ auto CurrVal = MIRBuilder.buildConstant(IntTy, 0);
+ LLT IdxTy = getLLTForMVT(TLI.getVectorIdxTy(MF.getDataLayout()));
+
+ for (unsigned I = 0, E = MemTy.getNumElements(); I < E; ++I) {
+ auto Elt = MIRBuilder.buildExtractVectorElement(
+ SrcTy.getElementType(), SrcReg,
+ MIRBuilder.buildConstant(IdxTy, I));
+ auto Trunc = MIRBuilder.buildTrunc(MemScalarTy, Elt);
+ auto ZExt = MIRBuilder.buildZExt(IntTy, Trunc);
+ unsigned ShiftIntoIdx = MF.getDataLayout().isBigEndian()
+ ? (MemTy.getNumElements() - 1) - I
+ : I;
+ auto ShiftAmt = MIRBuilder.buildConstant(
+ IntTy, ShiftIntoIdx * MemScalarTy.getSizeInBits());
+ auto Shifted = MIRBuilder.buildShl(IntTy, ZExt, ShiftAmt);
+ CurrVal = MIRBuilder.buildOr(IntTy, CurrVal, Shifted);
+ }
+ auto PtrInfo = MMO.getPointerInfo();
+ auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, IntTy);
+ MIRBuilder.buildStore(CurrVal, PtrReg, *NewMMO);
+ StoreMI.eraseFromParent();
+ return Legalized;
+ }
+
+ // FIXME: implement simple scalarization.
return UnableToLegalize;
+ }
// TODO: We can do better than scalarizing the vector and at least split it
// in half.
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 16b0587e799c7b..062e7ace5e724d 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -474,7 +474,8 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
})
.customIf(IsPtrVecPred)
.scalarizeIf(typeInSet(0, {v2s16, v2s8}), 0)
- .scalarizeIf(scalarOrEltWiderThan(0, 64), 0);
+ .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
+ .lower();
getActionDefinitionsBuilder(G_INDEXED_STORE)
// Idx 0 == Ptr, Idx 1 == Val
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-bitcast.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-bitcast.mir
index be5699dab5b6de..e8e1cd351c56e3 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-bitcast.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-bitcast.mir
@@ -1,5 +1,5 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4
-# RUN: llc -mtriple=aarch64 -run-pass=legalizer -global-isel-abort=2 %s -o - | FileCheck %s
+# RUN: llc -mtriple=aarch64 -run-pass=legalizer -global-isel-abort=1 %s -o - | FileCheck %s
---
name: scalar_to_oversize_vector
tracksRegLiveness: true
@@ -48,17 +48,66 @@ body: |
G_BR %bb.2
...
-# This test currently is expected to fall back after reaching truncstore of <8 x s8> as <8 x s1>.
---
name: boolean_vector_to_scalar
tracksRegLiveness: true
body: |
bb.1:
; CHECK-LABEL: name: boolean_vector_to_scalar
- ; CHECK: %vec:_(<8 x s1>) = G_IMPLICIT_DEF
+ ; CHECK: [[DEF:%[0-9]+]]:_(<8 x s8>) = G_IMPLICIT_DEF
; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0
- ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(<8 x s8>) = G_ANYEXT %vec(<8 x s1>)
- ; CHECK-NEXT: G_STORE [[ANYEXT]](<8 x s8>), [[FRAME_INDEX]](p0) :: (store (<8 x s1>) into %stack.0)
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[DEF]](<8 x s8>), [[C]](s64)
+ ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EVEC]](s8)
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C1]]
+ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s64)
+ ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL]]
+ ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+ ; CHECK-NEXT: [[EVEC1:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[DEF]](<8 x s8>), [[C3]](s64)
+ ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[EVEC1]](s8)
+ ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C1]]
+ ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C3]](s64)
+ ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+ ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+ ; CHECK-NEXT: [[EVEC2:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[DEF]](<8 x s8>), [[C4]](s64)
+ ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[EVEC2]](s8)
+ ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C1]]
+ ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C4]](s64)
+ ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+ ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+ ; CHECK-NEXT: [[EVEC3:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[DEF]](<8 x s8>), [[C5]](s64)
+ ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[EVEC3]](s8)
+ ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ANYEXT3]], [[C1]]
+ ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C5]](s64)
+ ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[OR2]], [[SHL3]]
+ ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+ ; CHECK-NEXT: [[EVEC4:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[DEF]](<8 x s8>), [[C6]](s64)
+ ; CHECK-NEXT: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[EVEC4]](s8)
+ ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[ANYEXT4]], [[C1]]
+ ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C6]](s64)
+ ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+ ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
+ ; CHECK-NEXT: [[EVEC5:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[DEF]](<8 x s8>), [[C7]](s64)
+ ; CHECK-NEXT: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[EVEC5]](s8)
+ ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[ANYEXT5]], [[C1]]
+ ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C7]](s64)
+ ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+ ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+ ; CHECK-NEXT: [[EVEC6:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[DEF]](<8 x s8>), [[C8]](s64)
+ ; CHECK-NEXT: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[EVEC6]](s8)
+ ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[ANYEXT6]], [[C1]]
+ ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C8]](s64)
+ ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[OR5]], [[SHL6]]
+ ; CHECK-NEXT: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
+ ; CHECK-NEXT: [[EVEC7:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[DEF]](<8 x s8>), [[C9]](s64)
+ ; CHECK-NEXT: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[EVEC7]](s8)
+ ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[ANYEXT7]], [[C1]]
+ ; CHECK-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C9]](s64)
+ ; CHECK-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[OR7]](s32)
+ ; CHECK-NEXT: G_STORE [[TRUNC]](s8), [[FRAME_INDEX]](p0) :: (store (s8) into %stack.0)
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[FRAME_INDEX]](p0) :: (load (s8) from %stack.0)
; CHECK-NEXT: %bc:_(s8) = COPY [[LOAD]](s8)
; CHECK-NEXT: %ext:_(s32) = G_ANYEXT %bc(s8)
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-store-vector-bools.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-store-vector-bools.mir
index de70f89461780b..1df6297e363833 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-store-vector-bools.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-store-vector-bools.mir
@@ -1,6 +1,5 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
-# RUN: llc -O0 -mtriple=aarch64 -run-pass=legalizer -global-isel-abort=2 %s -o - | FileCheck %s
-# This test currently is expected to fall back after reaching truncstore of <8 x s8> as <8 x s1>.
+# RUN: llc -O0 -mtriple=aarch64 -run-pass=legalizer -global-isel-abort=1 %s -o - | FileCheck %s
---
name: store_8xs1
tracksRegLiveness: true
@@ -13,12 +12,67 @@ body: |
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1
; CHECK-NEXT: %ptr:_(p0) = COPY $x0
- ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[COPY]](<4 x s32>), [[COPY1]](<4 x s32>)
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32)
- ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(<8 x s1>) = G_ICMP intpred(slt), [[CONCAT_VECTORS]](<8 x s32>), [[BUILD_VECTOR]]
- ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(<8 x s8>) = G_ANYEXT [[ICMP]](<8 x s1>)
- ; CHECK-NEXT: G_STORE [[ANYEXT]](<8 x s8>), %ptr(p0) :: (store (<8 x s1>))
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32)
+ ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32)
+ ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(<4 x s32>) = G_ICMP intpred(slt), [[COPY]](<4 x s32>), [[BUILD_VECTOR]]
+ ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(<4 x s32>) = G_ICMP intpred(slt), [[COPY1]](<4 x s32>), [[BUILD_VECTOR1]]
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[ICMP]](<4 x s32>)
+ ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[ICMP1]](<4 x s32>)
+ ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[TRUNC]](<4 x s16>), [[TRUNC1]](<4 x s16>)
+ ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(<8 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<8 x s16>)
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[TRUNC2]](<8 x s8>), [[C1]](s64)
+ ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EVEC]](s8)
+ ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C2]]
+ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C1]](s64)
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY2]], [[SHL]]
+ ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+ ; CHECK-NEXT: [[EVEC1:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[TRUNC2]](<8 x s8>), [[C3]](s64)
+ ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[EVEC1]](s8)
+ ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C2]]
+ ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C3]](s64)
+ ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+ ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+ ; CHECK-NEXT: [[EVEC2:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[TRUNC2]](<8 x s8>), [[C4]](s64)
+ ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[EVEC2]](s8)
+ ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C2]]
+ ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C4]](s64)
+ ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+ ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+ ; CHECK-NEXT: [[EVEC3:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[TRUNC2]](<8 x s8>), [[C5]](s64)
+ ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[EVEC3]](s8)
+ ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ANYEXT3]], [[C2]]
+ ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C5]](s64)
+ ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[OR2]], [[SHL3]]
+ ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+ ; CHECK-NEXT: [[EVEC4:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[TRUNC2]](<8 x s8>), [[C6]](s64)
+ ; CHECK-NEXT: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[EVEC4]](s8)
+ ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[ANYEXT4]], [[C2]]
+ ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C6]](s64)
+ ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+ ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
+ ; CHECK-NEXT: [[EVEC5:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[TRUNC2]](<8 x s8>), [[C7]](s64)
+ ; CHECK-NEXT: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[EVEC5]](s8)
+ ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[ANYEXT5]], [[C2]]
+ ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C7]](s64)
+ ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+ ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+ ; CHECK-NEXT: [[EVEC6:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[TRUNC2]](<8 x s8>), [[C8]](s64)
+ ; CHECK-NEXT: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[EVEC6]](s8)
+ ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[ANYEXT6]], [[C2]]
+ ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C8]](s64)
+ ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[OR5]], [[SHL6]]
+ ; CHECK-NEXT: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
+ ; CHECK-NEXT: [[EVEC7:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[TRUNC2]](<8 x s8>), [[C9]](s64)
+ ; CHECK-NEXT: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[EVEC7]](s8)
+ ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[ANYEXT7]], [[C2]]
+ ; CHECK-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C9]](s64)
+ ; CHECK-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+ ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[OR7]](s32)
+ ; CHECK-NEXT: G_STORE [[TRUNC3]](s8), %ptr(p0) :: (store (s8))
; CHECK-NEXT: RET_ReallyLR
%1:_(<4 x s32>) = COPY $q0
%2:_(<4 x s32>) = COPY $q1
diff --git a/llvm/test/CodeGen/AArch64/vec-combine-compare-to-bitmask.ll b/llvm/test/CodeGen/AArch64/vec-combine-compare-to-bitmask.ll
index 7f2eefe5ed72f6..496f7ebf300e50 100644
--- a/llvm/test/CodeGen/AArch64/vec-combine-compare-to-bitmask.ll
+++ b/llvm/test/CodeGen/AArch64/vec-combine-compare-to-bitmask.ll
@@ -1,26 +1,100 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
-; RUN: llc -mtriple=aarch64-apple-darwin -mattr=+neon -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-apple-darwin -mattr=+neon -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,SDAG
+; RUN: llc -mtriple=aarch64-apple-darwin -mattr=+neon -global-isel -global-isel-abort=2 -verify-machineinstrs < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,GISEL
; Basic tests from input vector to bitmask
; IR generated from clang for:
; __builtin_convertvector + reinterpret_cast<uint16&>
+; GISEL: warning: Instruction selection used fallback path for convert_to_bitmask4
+; GISEL-NEXT: warning: Instruction selection used fallback path for convert_to_bitmask2
+; GISEL-NEXT: warning: Instruction selection used fallback path for clang_builtins_undef_concat_convert_to_bitmask4
+; GISEL-NEXT: warning: Instruction selection used fallback path for convert_to_bitmask_no_compare
+; GISEL-NEXT: warning: Instruction selection used fallback path for convert_to_bitmask_with_compare_chain
+; GISEL-NEXT: warning: Instruction selection used fallback path for convert_to_bitmask_with_trunc_in_chain
+; GISEL-NEXT: warning: Instruction selection used fallback path for convert_to_bitmask_with_unknown_type_in_long_chain
+; GISEL-NEXT: warning: Instruction selection used fallback path for convert_to_bitmask_with_different_types_in_chain
+; GISEL-NEXT: warning: Instruction selection used fallback path for convert_to_bitmask_2xi32
+; GISEL-NEXT: warning: Instruction selection used fallback path for convert_to_bitmask_4xi8
+; GISEL-NEXT: warning: Instruction selection used fallback path for convert_to_bitmask_8xi2
+; GISEL-NEXT: warning: Instruction selection used fallback path for convert_to_bitmask_float
+; GISEL-NEXT: warning: Instruction selection used fallback path for convert_legalized_illegal_element_size
+; GISEL-NEXT: warning: Instruction selection used fallback path for no_direct_convert_for_bad_concat
+; GISEL-NEXT: warning: Instruction selection used fallback path for no_combine_illegal_num_elements
+
define i16 @convert_to_bitmask16(<16 x i8> %vec) {
; Bits used in mask
-; CHECK-LABEL: convert_to_bitmask16:
-; CHECK: ; %bb.0:
-; CHECK-NEXT: Lloh0:
-; CHECK-NEXT: adrp x8, lCPI0_0 at PAGE
-; CHECK-NEXT: cmeq.16b v0, v0, #0
-; CHECK-NEXT: Lloh1:
-; CHECK-NEXT: ldr q1, [x8, lCPI0_0 at PAGEOFF]
-; CHECK-NEXT: bic.16b v0, v1, v0
-; CHECK-NEXT: ext.16b v1, v0, v0, #8
-; CHECK-NEXT: zip1.16b v0, v0, v1
-; CHECK-NEXT: addv.8h h0, v0
-; CHECK-NEXT: fmov w0, s0
-; CHECK-NEXT: ret
-; CHECK-NEXT: .loh AdrpLdr Lloh0, Lloh1
+; SDAG-LABEL: convert_to_bitmask16:
+; SDAG: ; %bb.0:
+; SDAG-NEXT: Lloh0:
+; SDAG-NEXT: adrp x8, lCPI0_0 at PAGE
+; SDAG-NEXT: cmeq.16b v0, v0, #0
+; SDAG-NEXT: Lloh1:
+; SDAG-NEXT: ldr q1, [x8, lCPI0_0 at PAGEOFF]
+; SDAG-NEXT: bic.16b v0, v1, v0
+; SDAG-NEXT: ext.16b v1, v0, v0, #8
+; SDAG-NEXT: zip1.16b v0, v0, v1
+; SDAG-NEXT: addv.8h h0, v0
+; SDAG-NEXT: fmov w0, s0
+; SDAG-NEXT: ret
+; SDAG-NEXT: .loh AdrpLdr Lloh0, Lloh1
+;
+; GISEL-LABEL: convert_to_bitmask16:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: sub sp, sp, #16
+; GISEL-NEXT: .cfi_def_cfa_offset 16
+; GISEL-NEXT: cmeq.16b v0, v0, #0
+; GISEL-NEXT: mvn.16b v0, v0
+; GISEL-NEXT: umov.b w8, v0[1]
+; GISEL-NEXT: umov.b w9, v0[0]
+; GISEL-NEXT: umov.b w10, v0[2]
+; GISEL-NEXT: umov.b w11, v0[3]
+; GISEL-NEXT: and w8, w8, #0x1
+; GISEL-NEXT: bfi w9, w8, #1, #31
+; GISEL-NEXT: and w8, w10, #0x1
+; GISEL-NEXT: umov.b w10, v0[4]
+; GISEL-NEXT: orr w8, w9, w8, lsl #2
+; GISEL-NEXT: and w9, w11, #0x1
+; GISEL-NEXT: umov.b w11, v0[5]
+; GISEL-NEXT: orr w8, w8, w9, lsl #3
+; GISEL-NEXT: and w9, w10, #0x1
+; GISEL-NEXT: umov.b w10, v0[6]
+; GISEL-NEXT: orr w8, w8, w9, lsl #4
+; GISEL-NEXT: and w9, w11, #0x1
+; GISEL-NEXT: umov.b w11, v0[7]
+; GISEL-NEXT: orr w8, w8, w9, lsl #5
+; GISEL-NEXT: and w9, w10, #0x1
+; GISEL-NEXT: umov.b w10, v0[8]
+; GISEL-NEXT: orr w8, w8, w9, lsl #6
+; GISEL-NEXT: and w9, w11, #0x1
+; GISEL-NEXT: umov.b w11, v0[9]
+; GISEL-NEXT: orr w8, w8, w9, lsl #7
+; GISEL-NEXT: and w9, w10, #0x1
+; GISEL-NEXT: umov.b w10, v0[10]
+; GISEL-NEXT: orr w8, w8, w9, lsl #8
+; GISEL-NEXT: and w9, w11, #0x1
+; GISEL-NEXT: umov.b w11, v0[11]
+; GISEL-NEXT: orr w8, w8, w9, lsl #9
+; GISEL-NEXT: and w9, w10, #0x1
+; GISEL-NEXT: umov.b w10, v0[12]
+; GISEL-NEXT: orr w8, w8, w9, lsl #10
+; GISEL-NEXT: and w9, w11, #0x1
+; GISEL-NEXT: umov.b w11, v0[13]
+; GISEL-NEXT: orr w8, w8, w9, lsl #11
+; GISEL-NEXT: and w9, w10, #0x1
+; GISEL-NEXT: umov.b w10, v0[14]
+; GISEL-NEXT: orr w8, w8, w9, lsl #12
+; GISEL-NEXT: and w9, w11, #0x1
+; GISEL-NEXT: umov.b w11, v0[15]
+; GISEL-NEXT: orr w8, w8, w9, lsl #13
+; GISEL-NEXT: and w9, w10, #0x1
+; GISEL-NEXT: orr w8, w8, w9, lsl #14
+; GISEL-NEXT: and w9, w11, #0x1
+; GISEL-NEXT: orr w8, w8, w9, lsl #15
+; GISEL-NEXT: strh w8, [sp, #14]
+; GISEL-NEXT: and w0, w8, #0xffff
+; GISEL-NEXT: add sp, sp, #16
+; GISEL-NEXT: ret
; Actual conversion
@@ -30,19 +104,53 @@ define i16 @convert_to_bitmask16(<16 x i8> %vec) {
}
define i16 @convert_to_bitmask8(<8 x i16> %vec) {
-; CHECK-LABEL: convert_to_bitmask8:
-; CHECK: ; %bb.0:
-; CHECK-NEXT: Lloh2:
-; CHECK-NEXT: adrp x8, lCPI1_0 at PAGE
-; CHECK-NEXT: cme...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/121169
More information about the llvm-branch-commits
mailing list