[llvm] 6984cfe - [X86] Ensure concat(blendi(), blendi()) -> vselect() uses legal select mask types
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Tue Mar 25 02:14:22 PDT 2025
Author: Simon Pilgrim
Date: 2025-03-25T09:14:08Z
New Revision: 6984cfea6c888965bb45d046abbb97ddb0443955
URL: https://github.com/llvm/llvm-project/commit/6984cfea6c888965bb45d046abbb97ddb0443955
DIFF: https://github.com/llvm/llvm-project/commit/6984cfea6c888965bb45d046abbb97ddb0443955.diff
LOG: [X86] Ensure concat(blendi(),blendi()) -> vselect() uses legal select mask types
For 256-bit selections, we could be using sub-i8/vXi8 selection condition masks - extend these to i8 and then extract the lowest mask subvector
Fixes #132844
Added:
llvm/test/CodeGen/X86/pr132844.ll
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 2b5f9e9951613..78b9b736c478c 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -58617,10 +58617,13 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
APInt Mask = getBLENDIBlendMask(Ops[0]).zext(NumElts);
for (unsigned I = 1; I != NumOps; ++I)
Mask.insertBits(getBLENDIBlendMask(Ops[I]), I * (NumElts / NumOps));
- MVT MaskSVT = MVT::getIntegerVT(NumElts);
- MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
+ unsigned NumMaskBits = NumElts >= 8 ? NumElts : 8;
+ Mask = Mask.zextOrTrunc(NumMaskBits);
+ MVT MaskSVT = MVT::getIntegerVT(NumMaskBits);
+ MVT MaskVT = MVT::getVectorVT(MVT::i1, NumMaskBits);
SDValue Sel =
DAG.getBitcast(MaskVT, DAG.getConstant(Mask, DL, MaskSVT));
+ Sel = extractSubVector(Sel, 0, DAG, DL, NumElts);
Concat0 = Concat0 ? Concat0 : ConcatSubOperand(VT, Ops, 0);
Concat1 = Concat1 ? Concat1 : ConcatSubOperand(VT, Ops, 1);
return DAG.getSelect(DL, VT, Sel, Concat1, Concat0);
diff --git a/llvm/test/CodeGen/X86/pr132844.ll b/llvm/test/CodeGen/X86/pr132844.ll
new file mode 100644
index 0000000000000..ded100b2accce
--- /dev/null
+++ b/llvm/test/CodeGen/X86/pr132844.ll
@@ -0,0 +1,32 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64-v4 | FileCheck %s
+
+define { ptr, i8 } @PR132844(<4 x ptr> %0, <4 x ptr> %1) {
+; CHECK-LABEL: PR132844:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm2
+; CHECK-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
+; CHECK-NEXT: movb $10, %al
+; CHECK-NEXT: kmovd %eax, %k1
+; CHECK-NEXT: vinserti64x2 $1, 16, %ymm2, %ymm0 {%k1}
+; CHECK-NEXT: vmovdqu %ymm0, {{[0-9]+}}(%rsp)
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: xorl %edx, %edx
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+ %3 = alloca [35 x ptr], i32 0, align 16
+ %4 = load <4 x ptr>, ptr null, align 8
+ %5 = getelementptr i8, ptr %3, i64 216
+ %6 = extractelement <4 x ptr> %4, i64 3
+ store ptr %6, ptr %5, align 8
+ %7 = getelementptr i8, ptr %3, i64 208
+ %8 = extractelement <4 x ptr> %0, i64 0
+ store ptr %8, ptr %7, align 8
+ %9 = getelementptr i8, ptr %3, i64 200
+ %10 = extractelement <4 x ptr> %0, i64 3
+ store ptr %10, ptr %9, align 8
+ %11 = getelementptr i8, ptr %3, i64 192
+ %12 = extractelement <4 x ptr> %1, i64 0
+ store ptr %12, ptr %11, align 8
+ ret { ptr, i8 } zeroinitializer
+}
More information about the llvm-commits
mailing list