[llvm] [X86][AVX512] Add pseudos for `AVX512_*_SETALLONES` (PR #169009)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Nov 20 23:08:07 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-x86
Author: Abhishek Kaushik (abhishek-kaushik22)
<details>
<summary>Changes</summary>
Introduce `AVX512_128_SETALLONES`, `AVX512_256_SETALLONES` pseudos to generate all-ones vectors.
Post-RA expansion:
- Use VEX vpcmpeqd for XMM/YMM0–15 when available (matches current codegen as `AVX512_128/256_SETALLONES` will be preferred over `AVX1/2_SETALLONES` for AVX512VL target).
- Use EVEX `vpternlogd imm=0xFF` for high regs.
Includes MIR tests for both VEX and EVEX paths.
---
Full diff: https://github.com/llvm/llvm-project/pull/169009.diff
4 Files Affected:
- (modified) llvm/lib/Target/X86/X86InstrAVX512.td (+6)
- (modified) llvm/lib/Target/X86/X86InstrInfo.cpp (+31-1)
- (added) llvm/test/CodeGen/X86/avx512-setallones-pseudo.mir (+30)
- (modified) llvm/test/CodeGen/X86/eq-or-eq-range-of-2.ll (+2-1)
``````````diff
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index 1b748b7355716..efb9f1309a528 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -300,6 +300,12 @@ def AVX512_512_SET0 : I<0, Pseudo, (outs VR512:$dst), (ins), "",
[(set VR512:$dst, (v16i32 immAllZerosV))]>;
def AVX512_512_SETALLONES : I<0, Pseudo, (outs VR512:$dst), (ins), "",
[(set VR512:$dst, (v16i32 immAllOnesV))]>;
+let AddedComplexity = 1, Predicates = [HasVLX] in {
+ def AVX512_128_SETALLONES : I<0, Pseudo, (outs VR128X:$dst), (ins),
+ "", [(set VR128X:$dst, (v4i32 immAllOnesV))]>;
+ def AVX512_256_SETALLONES : I<0, Pseudo, (outs VR256X:$dst), (ins),
+ "", [(set VR256X:$dst, (v8i32 immAllOnesV))]>;
+}
}
let Predicates = [HasAVX512] in {
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index 5c23f917d0530..3136ad36ca5cb 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -779,6 +779,8 @@ bool X86InstrInfo::isReMaterializableImpl(
case X86::AVX512_128_SET0:
case X86::AVX512_256_SET0:
case X86::AVX512_512_SET0:
+ case X86::AVX512_128_SETALLONES:
+ case X86::AVX512_256_SETALLONES:
case X86::AVX512_512_SETALLONES:
case X86::AVX512_FsFLD0SD:
case X86::AVX512_FsFLD0SH:
@@ -6253,9 +6255,31 @@ bool X86InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
MIB.addReg(Reg, RegState::Undef).addReg(Reg, RegState::Undef).addImm(0xf);
return true;
}
+ case X86::AVX512_128_SETALLONES:
+ case X86::AVX512_256_SETALLONES:
case X86::AVX512_512_SETALLONES: {
Register Reg = MIB.getReg(0);
- MIB->setDesc(get(X86::VPTERNLOGDZrri));
+ unsigned Opc;
+ switch (MI.getOpcode()) {
+ case X86::AVX512_128_SETALLONES: {
+ if (X86::VR128RegClass.contains(Reg))
+ return Expand2AddrUndef(MIB, get(X86::VPCMPEQDrr));
+
+ Opc = X86::VPTERNLOGDZ128rri;
+ break;
+ }
+ case X86::AVX512_256_SETALLONES: {
+ if (X86::VR256RegClass.contains(Reg))
+ return Expand2AddrUndef(MIB, get(X86::VPCMPEQDYrr));
+
+ Opc = X86::VPTERNLOGDZ256rri;
+ break;
+ }
+ case X86::AVX512_512_SETALLONES:
+ Opc = X86::VPTERNLOGDZrri;
+ break;
+ }
+ MIB->setDesc(get(Opc));
// VPTERNLOGD needs 3 register inputs and an immediate.
// 0xff will return 1s for any input.
MIB.addReg(Reg, RegState::Undef)
@@ -8194,6 +8218,7 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
case X86::AVX1_SETALLONES:
case X86::AVX_SET0:
case X86::AVX512_256_SET0:
+ case X86::AVX512_256_SETALLONES:
Alignment = Align(32);
break;
case X86::V_SET0:
@@ -8201,6 +8226,7 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
case X86::AVX512_128_SET0:
case X86::FsFLD0F128:
case X86::AVX512_FsFLD0F128:
+ case X86::AVX512_128_SETALLONES:
Alignment = Align(16);
break;
case X86::MMX_SET0:
@@ -8259,6 +8285,8 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
case X86::AVX512_128_SET0:
case X86::AVX512_256_SET0:
case X86::AVX512_512_SET0:
+ case X86::AVX512_128_SETALLONES:
+ case X86::AVX512_256_SETALLONES:
case X86::AVX512_512_SETALLONES:
case X86::FsFLD0SH:
case X86::AVX512_FsFLD0SH:
@@ -8319,6 +8347,7 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
break;
case X86::AVX1_SETALLONES:
case X86::AVX2_SETALLONES:
+ case X86::AVX512_256_SETALLONES:
IsAllOnes = true;
[[fallthrough]];
case X86::AVX512_256_SET0:
@@ -8332,6 +8361,7 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
2);
break;
case X86::V_SETALLONES:
+ case X86::AVX512_128_SETALLONES:
IsAllOnes = true;
[[fallthrough]];
case X86::V_SET0:
diff --git a/llvm/test/CodeGen/X86/avx512-setallones-pseudo.mir b/llvm/test/CodeGen/X86/avx512-setallones-pseudo.mir
new file mode 100644
index 0000000000000..7e5ddc4cd632f
--- /dev/null
+++ b/llvm/test/CodeGen/X86/avx512-setallones-pseudo.mir
@@ -0,0 +1,30 @@
+# NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+# RUN: llc %s -mtriple=x86_64-- -start-before=postrapseudos -o - | FileCheck %s
+
+--- |
+ target triple = "x86_64-unknown-unknown"
+
+ define void @setallones() #0 {
+ ; CHECK-LABEL: setallones:
+ ; CHECK: # %bb.0:
+ ; CHECK-NEXT: vpcmpeqd %xmm14, %xmm14, %xmm14
+ ; CHECK-NEXT: vpternlogd {{.*#+}} xmm16 = -1
+ ; CHECK-NEXT: vpcmpeqd %ymm15, %ymm15, %ymm15
+ ; CHECK-NEXT: vpternlogd {{.*#+}} ymm17 = -1
+ entry:
+ unreachable
+ }
+
+ attributes #0 = { "target-features"="+avx512f,+avx512vl" }
+---
+name: setallones
+tracksRegLiveness: true
+liveins: []
+body: |
+ bb.0:
+ $xmm14 = AVX512_128_SETALLONES
+ $xmm16 = AVX512_128_SETALLONES
+ $ymm15 = AVX512_256_SETALLONES
+ $ymm17 = AVX512_256_SETALLONES
+
+...
diff --git a/llvm/test/CodeGen/X86/eq-or-eq-range-of-2.ll b/llvm/test/CodeGen/X86/eq-or-eq-range-of-2.ll
index 3243d950740ca..e2400fbe2c4ff 100644
--- a/llvm/test/CodeGen/X86/eq-or-eq-range-of-2.ll
+++ b/llvm/test/CodeGen/X86/eq-or-eq-range-of-2.ll
@@ -106,7 +106,8 @@ define <4 x i32> @eq_or_eq_ult_2_fail_multiuse(<4 x i32> %x) {
; AVX512: # %bb.0:
; AVX512-NEXT: subq $24, %rsp
; AVX512-NEXT: .cfi_def_cfa_offset 32
-; AVX512-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
+; AVX512-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; AVX512-NEXT: vpaddd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill
; AVX512-NEXT: callq use.v4.i32 at PLT
; AVX512-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload
``````````
</details>
https://github.com/llvm/llvm-project/pull/169009
More information about the llvm-commits
mailing list