[llvm] [X86] Handle X86ISD::EXPAND/COMPRESS nodes as target shuffles (PR #171119)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Dec 8 04:19:08 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-x86
Author: Simon Pilgrim (RKSimon)
<details>
<summary>Changes</summary>
Allows for shuffle simplification
Required a minor fix to the overly reduced compress-undef-float-passthrough.ll regression test
---
Full diff: https://github.com/llvm/llvm-project/pull/171119.diff
3 Files Affected:
- (modified) llvm/lib/Target/X86/X86ISelLowering.cpp (+46)
- (modified) llvm/test/CodeGen/X86/compress-undef-float-passthrough.ll (+2-2)
- (modified) llvm/test/CodeGen/X86/vector-shuffle-combining-avx512f.ll (+11-56)
``````````diff
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index d0ae75b2e6785..b71878ae1434c 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -2888,6 +2888,8 @@ static bool isTargetShuffle(unsigned Opcode) {
case X86ISD::VPERMV:
case X86ISD::VPERMV3:
case X86ISD::VZEXT_MOVL:
+ case X86ISD::COMPRESS:
+ case X86ISD::EXPAND:
return true;
}
}
@@ -5839,6 +5841,48 @@ static bool getTargetShuffleMask(SDValue N, bool AllowSentinelZero,
}
return false;
}
+ case X86ISD::COMPRESS: {
+ SDValue CmpVec = N.getOperand(0);
+ SDValue PassThru = N.getOperand(1);
+ SDValue CmpMask = N.getOperand(2);
+ APInt UndefElts;
+ SmallVector<APInt> EltBits;
+ if (!getTargetConstantBitsFromNode(CmpMask, 1, UndefElts, EltBits))
+ return false;
+ assert(UndefElts.getBitWidth() == NumElems && EltBits.size() == NumElems &&
+ "Illegal compression mask");
+ for (unsigned I = 0; I != NumElems; ++I) {
+ if (!EltBits[I].isZero())
+ Mask.push_back(I);
+ }
+ while (Mask.size() != NumElems) {
+ Mask.push_back(NumElems + Mask.size());
+ }
+ Ops.push_back(CmpVec);
+ Ops.push_back(PassThru);
+ return true;
+ }
+ case X86ISD::EXPAND: {
+ SDValue ExpVec = N.getOperand(0);
+ SDValue PassThru = N.getOperand(1);
+ SDValue ExpMask = N.getOperand(2);
+ APInt UndefElts;
+ SmallVector<APInt> EltBits;
+ if (!getTargetConstantBitsFromNode(ExpMask, 1, UndefElts, EltBits))
+ return false;
+ assert(UndefElts.getBitWidth() == NumElems && EltBits.size() == NumElems &&
+ "Illegal expansion mask");
+ unsigned ExpIndex = 0;
+ for (unsigned I = 0; I != NumElems; ++I) {
+ if (EltBits[I].isZero())
+ Mask.push_back(I + NumElems);
+ else
+ Mask.push_back(ExpIndex++);
+ }
+ Ops.push_back(ExpVec);
+ Ops.push_back(PassThru);
+ return true;
+ }
default:
llvm_unreachable("unknown target shuffle node");
}
@@ -61325,6 +61369,8 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case X86ISD::VPERM2X128:
case X86ISD::SHUF128:
case X86ISD::VZEXT_MOVL:
+ case X86ISD::COMPRESS:
+ case X86ISD::EXPAND:
case ISD::VECTOR_SHUFFLE: return combineShuffle(N, DAG, DCI,Subtarget);
case X86ISD::FMADD_RND:
case X86ISD::FMSUB:
diff --git a/llvm/test/CodeGen/X86/compress-undef-float-passthrough.ll b/llvm/test/CodeGen/X86/compress-undef-float-passthrough.ll
index 47331db7261b3..b19112c02c085 100644
--- a/llvm/test/CodeGen/X86/compress-undef-float-passthrough.ll
+++ b/llvm/test/CodeGen/X86/compress-undef-float-passthrough.ll
@@ -1,7 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-- -mattr=avx512f,avx512vl | FileCheck %s --check-prefix=CHECK
-define void @test_compress_undef_float_passthrough() {
+define void @test_compress_undef_float_passthrough(<4 x double> %a0) {
; CHECK-LABEL: test_compress_undef_float_passthrough:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: movb $5, %al
@@ -12,7 +12,7 @@ define void @test_compress_undef_float_passthrough() {
; CHECK-NEXT: retq
entry: ; preds = %loop.50
%0 = bitcast i4 undef to <4 x i1>
- %1 = call <4 x double> @llvm.x86.avx512.mask.compress.v4f64(<4 x double> undef, <4 x double> undef, <4 x i1> <i1 1, i1 0, i1 1, i1 0>)
+ %1 = call <4 x double> @llvm.x86.avx512.mask.compress.v4f64(<4 x double> %a0, <4 x double> undef, <4 x i1> <i1 1, i1 0, i1 1, i1 0>)
call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> %1, <4 x ptr> undef, i32 0, <4 x i1> %0)
ret void
}
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512f.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512f.ll
index 3ea95eeaedfc7..b79d9e8ce47e9 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512f.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512f.ll
@@ -1035,68 +1035,23 @@ define <8 x double> @concat_vpermilvar_v8f64_v4f64(<4 x double> %a0, <4 x double
ret <8 x double> %res
}
-; TODO - shift elements up by one
+; shift elements up by one
define <16 x i32> @combine_vexpandd_as_valignd(<16 x i32> %x) {
-; X86-AVX512F-LABEL: combine_vexpandd_as_valignd:
-; X86-AVX512F: # %bb.0:
-; X86-AVX512F-NEXT: movw $-2, %ax
-; X86-AVX512F-NEXT: kmovw %eax, %k1
-; X86-AVX512F-NEXT: vpexpandd %zmm0, %zmm0 {%k1} {z}
-; X86-AVX512F-NEXT: retl
-;
-; X86-AVX512BW-LABEL: combine_vexpandd_as_valignd:
-; X86-AVX512BW: # %bb.0:
-; X86-AVX512BW-NEXT: movw $-2, %ax
-; X86-AVX512BW-NEXT: kmovd %eax, %k1
-; X86-AVX512BW-NEXT: vpexpandd %zmm0, %zmm0 {%k1} {z}
-; X86-AVX512BW-NEXT: retl
-;
-; X64-AVX512F-LABEL: combine_vexpandd_as_valignd:
-; X64-AVX512F: # %bb.0:
-; X64-AVX512F-NEXT: movw $-2, %ax
-; X64-AVX512F-NEXT: kmovw %eax, %k1
-; X64-AVX512F-NEXT: vpexpandd %zmm0, %zmm0 {%k1} {z}
-; X64-AVX512F-NEXT: retq
-;
-; X64-AVX512BW-LABEL: combine_vexpandd_as_valignd:
-; X64-AVX512BW: # %bb.0:
-; X64-AVX512BW-NEXT: movw $-2, %ax
-; X64-AVX512BW-NEXT: kmovd %eax, %k1
-; X64-AVX512BW-NEXT: vpexpandd %zmm0, %zmm0 {%k1} {z}
-; X64-AVX512BW-NEXT: retq
+; CHECK-LABEL: combine_vexpandd_as_valignd:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: valignd {{.*#+}} zmm0 = zmm1[15],zmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
+; CHECK-NEXT: ret{{[l|q]}}
%res = call <16 x i32> @llvm.x86.avx512.mask.expand.v16i32(<16 x i32> %x, <16 x i32> zeroinitializer, <16 x i1> <i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
ret <16 x i32> %res
}
-; TODO - zero upper half of vector
+; zero upper half of vector
define <16 x i32> @combine_vcompressd_as_vmov(<16 x i32> %x) {
-; X86-AVX512F-LABEL: combine_vcompressd_as_vmov:
-; X86-AVX512F: # %bb.0:
-; X86-AVX512F-NEXT: movw $255, %ax
-; X86-AVX512F-NEXT: kmovw %eax, %k1
-; X86-AVX512F-NEXT: vpcompressd %zmm0, %zmm0 {%k1} {z}
-; X86-AVX512F-NEXT: retl
-;
-; X86-AVX512BW-LABEL: combine_vcompressd_as_vmov:
-; X86-AVX512BW: # %bb.0:
-; X86-AVX512BW-NEXT: movw $255, %ax
-; X86-AVX512BW-NEXT: kmovd %eax, %k1
-; X86-AVX512BW-NEXT: vpcompressd %zmm0, %zmm0 {%k1} {z}
-; X86-AVX512BW-NEXT: retl
-;
-; X64-AVX512F-LABEL: combine_vcompressd_as_vmov:
-; X64-AVX512F: # %bb.0:
-; X64-AVX512F-NEXT: movw $255, %ax
-; X64-AVX512F-NEXT: kmovw %eax, %k1
-; X64-AVX512F-NEXT: vpcompressd %zmm0, %zmm0 {%k1} {z}
-; X64-AVX512F-NEXT: retq
-;
-; X64-AVX512BW-LABEL: combine_vcompressd_as_vmov:
-; X64-AVX512BW: # %bb.0:
-; X64-AVX512BW-NEXT: movw $255, %ax
-; X64-AVX512BW-NEXT: kmovd %eax, %k1
-; X64-AVX512BW-NEXT: vpcompressd %zmm0, %zmm0 {%k1} {z}
-; X64-AVX512BW-NEXT: retq
+; CHECK-LABEL: combine_vcompressd_as_vmov:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmovaps %ymm0, %ymm0
+; CHECK-NEXT: ret{{[l|q]}}
%res = call <16 x i32> @llvm.x86.avx512.mask.compress.v16i32(<16 x i32> %x, <16 x i32> zeroinitializer, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false>)
ret <16 x i32> %res
}
``````````
</details>
https://github.com/llvm/llvm-project/pull/171119
More information about the llvm-commits
mailing list