[llvm] Fixing upper lane return source for X86 intrinsics (PR #185329)
Aaron Smull via llvm-commits
llvm-commits at lists.llvm.org
Sun Mar 8 17:19:10 PDT 2026
https://github.com/asmull updated https://github.com/llvm/llvm-project/pull/185329
>From 5e491a4450764e7757581fbd8bc275d0d8f219b7 Mon Sep 17 00:00:00 2001
From: Aaron Smull <a.p.smull at gmail.com>
Date: Sun, 8 Mar 2026 17:01:59 -0700
Subject: [PATCH] Fixing upper lane return source for X86 intrinsics
---
llvm/lib/Target/X86/X86InstrAVX10.td | 24 ++++-----
llvm/lib/Target/X86/X86IntrinsicsInfo.h | 15 +++---
.../CodeGen/X86/avx10_2minmax-intrinsics.ll | 54 +++++++++++++++++++
3 files changed, 75 insertions(+), 18 deletions(-)
diff --git a/llvm/lib/Target/X86/X86InstrAVX10.td b/llvm/lib/Target/X86/X86InstrAVX10.td
index 4b3ddbdedbdee..44b083cd4f7c3 100644
--- a/llvm/lib/Target/X86/X86InstrAVX10.td
+++ b/llvm/lib/Target/X86/X86InstrAVX10.td
@@ -119,30 +119,30 @@ multiclass avx10_minmax_scalar<string OpStr, X86VectorVTInfo _, SDNode OpNode,
(i32 timm:$src3)))]>,
Sched<[WriteFMAX.Folded, WriteFMAX.ReadAfterFold]>;
}
- defm rri : AVX512_maskable<0x53, MRMSrcReg, _, (outs VR128X:$dst),
- (ins VR128X:$src1, VR128X:$src2, i32u8imm:$src3),
+ defm rri : AVX512_maskable_scalar<0x53, MRMSrcReg, _, (outs _.RC:$dst),
+ (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
OpStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
(_.VT (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
(i32 timm:$src3))),
- 0, 0, 0, vselect_mask, "", "_Int">,
+ "_Int">,
Sched<[WriteFMAX]>;
- defm rmi : AVX512_maskable<0x53, MRMSrcMem, _, (outs VR128X:$dst),
- (ins VR128X:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
+ defm rmi : AVX512_maskable_scalar<0x53, MRMSrcMem, _, (outs _.RC:$dst),
+ (ins _.RC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
OpStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
(_.VT (OpNode (_.VT _.RC:$src1), (_.ScalarIntMemFrags addr:$src2),
(i32 timm:$src3))),
- 0, 0, 0, vselect_mask, "", "_Int">,
+ "_Int">,
Sched<[WriteFMAX.Folded, WriteFMAX.ReadAfterFold]>;
}
let Uses = []<Register>, mayRaiseFPException = 0 in
- defm rrib : AVX512_maskable<0x53, MRMSrcReg, _, (outs VR128X:$dst),
- (ins VR128X:$src1, VR128X:$src2, i32u8imm:$src3),
+ defm rrib : AVX512_maskable_scalar<0x53, MRMSrcReg, _, (outs _.RC:$dst),
+ (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
OpStr, "$src3, {sae}, $src2, $src1",
"$src1, $src2, {sae}, $src3",
(_.VT (OpNodeSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2),
(i32 timm:$src3))),
- 0, 0, 0, vselect_mask, "", "_Int">,
+ "_Int">,
Sched<[WriteFMAX]>, EVEX_B;
}
}
@@ -164,11 +164,11 @@ defm VMINMAXPS : avx10_minmax_packed<"vminmaxps", avx512vl_f32_info, X86vminmax>
avx10_minmax_packed_sae<"vminmaxps", avx512vl_f32_info, X86vminmaxSae>,
AVX512PDIi8Base, TA, EVEX_CD8<32, CD8VF>;
-defm VMINMAXSD : avx10_minmax_scalar<"vminmaxsd", v2f64x_info, X86vminmaxs, X86vminmaxsSae>,
+defm VMINMAXSD : avx10_minmax_scalar<"vminmaxsd", f64x_info, X86vminmaxs, X86vminmaxsSae>,
AVX512AIi8Base, VEX_LIG, EVEX, VVVV, EVEX_CD8<64, CD8VT1>, REX_W;
-defm VMINMAXSH : avx10_minmax_scalar<"vminmaxsh", v8f16x_info, X86vminmaxs, X86vminmaxsSae>,
+defm VMINMAXSH : avx10_minmax_scalar<"vminmaxsh", f16x_info, X86vminmaxs, X86vminmaxsSae>,
AVX512PSIi8Base, VEX_LIG, EVEX, VVVV, EVEX_CD8<16, CD8VT1>, TA;
-defm VMINMAXSS : avx10_minmax_scalar<"vminmaxss", v4f32x_info, X86vminmaxs, X86vminmaxsSae>,
+defm VMINMAXSS : avx10_minmax_scalar<"vminmaxss", f32x_info, X86vminmaxs, X86vminmaxsSae>,
AVX512AIi8Base, VEX_LIG, EVEX, VVVV, EVEX_CD8<32, CD8VT1>;
//-------------------------------------------------
diff --git a/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/llvm/lib/Target/X86/X86IntrinsicsInfo.h
index c0c98c1f35491..7c83275e8bad8 100644
--- a/llvm/lib/Target/X86/X86IntrinsicsInfo.h
+++ b/llvm/lib/Target/X86/X86IntrinsicsInfo.h
@@ -611,12 +611,15 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86ISD::VMINMAX, 0),
X86_INTRINSIC_DATA(avx10_mask_vminmaxps256, INTR_TYPE_3OP_MASK_SAE,
X86ISD::VMINMAX, 0),
- X86_INTRINSIC_DATA(avx10_mask_vminmaxsd_round, INTR_TYPE_3OP_MASK_SAE,
- X86ISD::VMINMAXS, X86ISD::VMINMAXS_SAE),
- X86_INTRINSIC_DATA(avx10_mask_vminmaxsh_round, INTR_TYPE_3OP_MASK_SAE,
- X86ISD::VMINMAXS, X86ISD::VMINMAXS_SAE),
- X86_INTRINSIC_DATA(avx10_mask_vminmaxss_round, INTR_TYPE_3OP_MASK_SAE,
- X86ISD::VMINMAXS, X86ISD::VMINMAXS_SAE),
+ X86_INTRINSIC_DATA(avx10_mask_vminmaxsd_round,
+ INTR_TYPE_3OP_SCALAR_MASK_SAE, X86ISD::VMINMAXS,
+ X86ISD::VMINMAXS_SAE),
+ X86_INTRINSIC_DATA(avx10_mask_vminmaxsh_round,
+ INTR_TYPE_3OP_SCALAR_MASK_SAE, X86ISD::VMINMAXS,
+ X86ISD::VMINMAXS_SAE),
+ X86_INTRINSIC_DATA(avx10_mask_vminmaxss_round,
+ INTR_TYPE_3OP_SCALAR_MASK_SAE, X86ISD::VMINMAXS,
+ X86ISD::VMINMAXS_SAE),
X86_INTRINSIC_DATA(avx10_vcomisbf16eq, COMI, X86ISD::COMI, ISD::SETEQ),
X86_INTRINSIC_DATA(avx10_vcomisbf16ge, COMI, X86ISD::COMI, ISD::SETGE),
X86_INTRINSIC_DATA(avx10_vcomisbf16gt, COMI, X86ISD::COMI, ISD::SETGT),
diff --git a/llvm/test/CodeGen/X86/avx10_2minmax-intrinsics.ll b/llvm/test/CodeGen/X86/avx10_2minmax-intrinsics.ll
index 8ae5b670764e2..bded927bc3aa1 100644
--- a/llvm/test/CodeGen/X86/avx10_2minmax-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/avx10_2minmax-intrinsics.ll
@@ -561,6 +561,24 @@ define <2 x double>@test_int_x86_maskz_vminmaxsd_round(<2 x double> %A, <2 x dou
ret <2 x double> %ret
}
+define <2 x double> @test_upper_lane_passthrough_vminmaxsd_round(<2 x double> %A, <2 x double> %B, <2 x double> %C) nounwind {
+; X64-LABEL: test_upper_lane_passthrough_vminmaxsd_round:
+; X64: # %bb.0:
+; X64-NEXT: vmovsd %xmm2, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x10,0xc2]
+; X64-NEXT: # xmm0 = xmm2[0],xmm0[1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_upper_lane_passthrough_vminmaxsd_round:
+; X86: # %bb.0:
+; X86-NEXT: vmovsd %xmm2, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x10,0xc2]
+; X86-NEXT: # xmm0 = xmm2[0],xmm0[1]
+; X86-NEXT: retl # encoding: [0xc3]
+ %ret = call <2 x double> @llvm.x86.avx10.mask.vminmaxsd.round(<2 x double> %A, <2 x double> %B, i32 127, <2 x double> %C,
+ i8 0, ; constant-false mask -- forces the output to be the first source operand
+ i32 8)
+ ret <2 x double> %ret
+}
+
declare<2 x double> @llvm.x86.avx10.mask.vminmaxsd.round(<2 x double> %A, <2 x double> %B, i32 %C, <2 x double> %D, i8 %E, i32 %F)
define <8 x half>@test_int_x86_vminmaxsh(<8 x half> %A, <8 x half> %B) nounwind {
@@ -659,6 +677,24 @@ define <8 x half>@test_int_x86_maskz_vminmaxsh_round(<8 x half> %A, <8 x half> %
ret <8 x half> %ret
}
+define <8 x half> @test_upper_lane_passthrough_vminmaxsh_round(<8 x half> %A, <8 x half> %B, <8 x half> %C) nounwind {
+; X64-LABEL: test_upper_lane_passthrough_vminmaxsh_round:
+; X64: # %bb.0:
+; X64-NEXT: vmovsh %xmm2, %xmm0, %xmm0 # encoding: [0x62,0xf5,0x7e,0x08,0x10,0xc2]
+; X64-NEXT: # xmm0 = xmm2[0],xmm0[1,2,3,4,5,6,7]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_upper_lane_passthrough_vminmaxsh_round:
+; X86: # %bb.0:
+; X86-NEXT: vmovsh %xmm2, %xmm0, %xmm0 # encoding: [0x62,0xf5,0x7e,0x08,0x10,0xc2]
+; X86-NEXT: # xmm0 = xmm2[0],xmm0[1,2,3,4,5,6,7]
+; X86-NEXT: retl # encoding: [0xc3]
+ %ret = call <8 x half> @llvm.x86.avx10.mask.vminmaxsh.round(<8 x half> %A, <8 x half> %B, i32 127, <8 x half> %C,
+ i8 0, ; constant-false mask -- forces the output to be the first source operand
+ i32 8)
+ ret <8 x half> %ret
+}
+
declare<8 x half> @llvm.x86.avx10.mask.vminmaxsh.round(<8 x half> %A, <8 x half> %B, i32 %C, <8 x half> %D, i8 %E, i32 %F)
define <4 x float>@test_int_x86_vminmaxss(<4 x float> %A, <4 x float> %B) nounwind {
@@ -757,4 +793,22 @@ define <4 x float>@test_int_x86_maskz_vminmaxss_round(<4 x float> %A, <4 x float
ret <4 x float> %ret
}
+define <4 x float> @test_upper_lane_passthrough_vminmaxss_round(<4 x float> %A, <4 x float> %B, <4 x float> %C) nounwind {
+; X64-LABEL: test_upper_lane_passthrough_vminmaxss_round:
+; X64: # %bb.0:
+; X64-NEXT: vmovss %xmm2, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x10,0xc2]
+; X64-NEXT: # xmm0 = xmm2[0],xmm0[1,2,3]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_upper_lane_passthrough_vminmaxss_round:
+; X86: # %bb.0:
+; X86-NEXT: vmovss %xmm2, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x10,0xc2]
+; X86-NEXT: # xmm0 = xmm2[0],xmm0[1,2,3]
+; X86-NEXT: retl # encoding: [0xc3]
+ %ret = call <4 x float> @llvm.x86.avx10.mask.vminmaxss.round(<4 x float> %A, <4 x float> %B, i32 0, <4 x float> %C,
+ i8 0, ; constant-false mask -- forces the output to be the first source operand
+ i32 8)
+ ret <4 x float> %ret
+}
+
declare<4 x float> @llvm.x86.avx10.mask.vminmaxss.round(<4 x float> %A, <4 x float> %B, i32 %C, <4 x float> %D, i8 %E, i32 %F)
More information about the llvm-commits
mailing list