[llvm] a682073 - Fixing upper lane return source for X86 intrinsics (#185329)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Mar 9 23:58:59 PDT 2026
Author: Aaron Smull
Date: 2026-03-10T14:58:54+08:00
New Revision: a682073ae7a49de4b95498ba01b9ea32e6b5f607
URL: https://github.com/llvm/llvm-project/commit/a682073ae7a49de4b95498ba01b9ea32e6b5f607
DIFF: https://github.com/llvm/llvm-project/commit/a682073ae7a49de4b95498ba01b9ea32e6b5f607.diff
LOG: Fixing upper lane return source for X86 intrinsics (#185329)
Closes https://github.com/llvm/llvm-project/issues/184245
Added:
Modified:
llvm/lib/Target/X86/X86InstrAVX10.td
llvm/lib/Target/X86/X86IntrinsicsInfo.h
llvm/test/CodeGen/X86/avx10_2minmax-intrinsics.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86InstrAVX10.td b/llvm/lib/Target/X86/X86InstrAVX10.td
index 6d388089cd5ca..6064c5f0bfd15 100644
--- a/llvm/lib/Target/X86/X86InstrAVX10.td
+++ b/llvm/lib/Target/X86/X86InstrAVX10.td
@@ -119,30 +119,30 @@ multiclass avx10_minmax_scalar<string OpStr, X86VectorVTInfo _, SDNode OpNode,
(i32 timm:$src3)))]>,
Sched<[WriteFMAX.Folded, WriteFMAX.ReadAfterFold]>;
}
- defm rri : AVX512_maskable<0x53, MRMSrcReg, _, (outs VR128X:$dst),
- (ins VR128X:$src1, VR128X:$src2, i32u8imm:$src3),
+ defm rri : AVX512_maskable_scalar<0x53, MRMSrcReg, _, (outs _.RC:$dst),
+ (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
OpStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
(_.VT (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
(i32 timm:$src3))),
- 0, 0, 0, vselect_mask, "", "_Int">,
+ "_Int">,
Sched<[WriteFMAX]>;
- defm rmi : AVX512_maskable<0x53, MRMSrcMem, _, (outs VR128X:$dst),
- (ins VR128X:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
+ defm rmi : AVX512_maskable_scalar<0x53, MRMSrcMem, _, (outs _.RC:$dst),
+ (ins _.RC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
OpStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
(_.VT (OpNode (_.VT _.RC:$src1), (_.ScalarIntMemFrags addr:$src2),
(i32 timm:$src3))),
- 0, 0, 0, vselect_mask, "", "_Int">,
+ "_Int">,
Sched<[WriteFMAX.Folded, WriteFMAX.ReadAfterFold]>;
}
let Uses = []<Register>, mayRaiseFPException = 0 in
- defm rrib : AVX512_maskable<0x53, MRMSrcReg, _, (outs VR128X:$dst),
- (ins VR128X:$src1, VR128X:$src2, i32u8imm:$src3),
+ defm rrib : AVX512_maskable_scalar<0x53, MRMSrcReg, _, (outs _.RC:$dst),
+ (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
OpStr, "$src3, {sae}, $src2, $src1",
"$src1, $src2, {sae}, $src3",
(_.VT (OpNodeSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2),
(i32 timm:$src3))),
- 0, 0, 0, vselect_mask, "", "_Int">,
+ "_Int">,
Sched<[WriteFMAX]>, EVEX_B;
}
}
@@ -164,11 +164,11 @@ defm VMINMAXPS : avx10_minmax_packed<"vminmaxps", avx512vl_f32_info, X86vminmax>
avx10_minmax_packed_sae<"vminmaxps", avx512vl_f32_info, X86vminmaxSae>,
AVX512PDIi8Base, TA, EVEX_CD8<32, CD8VF>;
-defm VMINMAXSD : avx10_minmax_scalar<"vminmaxsd", v2f64x_info, X86vminmaxs, X86vminmaxsSae>,
+defm VMINMAXSD : avx10_minmax_scalar<"vminmaxsd", f64x_info, X86vminmaxs, X86vminmaxsSae>,
AVX512AIi8Base, VEX_LIG, EVEX, VVVV, EVEX_CD8<64, CD8VT1>, REX_W;
-defm VMINMAXSH : avx10_minmax_scalar<"vminmaxsh", v8f16x_info, X86vminmaxs, X86vminmaxsSae>,
+defm VMINMAXSH : avx10_minmax_scalar<"vminmaxsh", f16x_info, X86vminmaxs, X86vminmaxsSae>,
AVX512PSIi8Base, VEX_LIG, EVEX, VVVV, EVEX_CD8<16, CD8VT1>, TA;
-defm VMINMAXSS : avx10_minmax_scalar<"vminmaxss", v4f32x_info, X86vminmaxs, X86vminmaxsSae>,
+defm VMINMAXSS : avx10_minmax_scalar<"vminmaxss", f32x_info, X86vminmaxs, X86vminmaxsSae>,
AVX512AIi8Base, VEX_LIG, EVEX, VVVV, EVEX_CD8<32, CD8VT1>;
//-------------------------------------------------
diff --git a/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/llvm/lib/Target/X86/X86IntrinsicsInfo.h
index c0c98c1f35491..7c83275e8bad8 100644
--- a/llvm/lib/Target/X86/X86IntrinsicsInfo.h
+++ b/llvm/lib/Target/X86/X86IntrinsicsInfo.h
@@ -611,12 +611,15 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86ISD::VMINMAX, 0),
X86_INTRINSIC_DATA(avx10_mask_vminmaxps256, INTR_TYPE_3OP_MASK_SAE,
X86ISD::VMINMAX, 0),
- X86_INTRINSIC_DATA(avx10_mask_vminmaxsd_round, INTR_TYPE_3OP_MASK_SAE,
- X86ISD::VMINMAXS, X86ISD::VMINMAXS_SAE),
- X86_INTRINSIC_DATA(avx10_mask_vminmaxsh_round, INTR_TYPE_3OP_MASK_SAE,
- X86ISD::VMINMAXS, X86ISD::VMINMAXS_SAE),
- X86_INTRINSIC_DATA(avx10_mask_vminmaxss_round, INTR_TYPE_3OP_MASK_SAE,
- X86ISD::VMINMAXS, X86ISD::VMINMAXS_SAE),
+ X86_INTRINSIC_DATA(avx10_mask_vminmaxsd_round,
+ INTR_TYPE_3OP_SCALAR_MASK_SAE, X86ISD::VMINMAXS,
+ X86ISD::VMINMAXS_SAE),
+ X86_INTRINSIC_DATA(avx10_mask_vminmaxsh_round,
+ INTR_TYPE_3OP_SCALAR_MASK_SAE, X86ISD::VMINMAXS,
+ X86ISD::VMINMAXS_SAE),
+ X86_INTRINSIC_DATA(avx10_mask_vminmaxss_round,
+ INTR_TYPE_3OP_SCALAR_MASK_SAE, X86ISD::VMINMAXS,
+ X86ISD::VMINMAXS_SAE),
X86_INTRINSIC_DATA(avx10_vcomisbf16eq, COMI, X86ISD::COMI, ISD::SETEQ),
X86_INTRINSIC_DATA(avx10_vcomisbf16ge, COMI, X86ISD::COMI, ISD::SETGE),
X86_INTRINSIC_DATA(avx10_vcomisbf16gt, COMI, X86ISD::COMI, ISD::SETGT),
diff --git a/llvm/test/CodeGen/X86/avx10_2minmax-intrinsics.ll b/llvm/test/CodeGen/X86/avx10_2minmax-intrinsics.ll
index 8ae5b670764e2..9c4c4b78df641 100644
--- a/llvm/test/CodeGen/X86/avx10_2minmax-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/avx10_2minmax-intrinsics.ll
@@ -561,6 +561,25 @@ define <2 x double>@test_int_x86_maskz_vminmaxsd_round(<2 x double> %A, <2 x dou
ret <2 x double> %ret
}
+define <2 x double> @test_upper_lane_passthrough_vminmaxsd_round(<2 x double> %A, <2 x double> %B, <2 x double> %C) nounwind {
+; X64-LABEL: test_upper_lane_passthrough_vminmaxsd_round:
+; X64: # %bb.0:
+; X64-NEXT: vmovsd %xmm2, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x10,0xc2]
+; X64-NEXT: # xmm0 = xmm2[0],xmm0[1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_upper_lane_passthrough_vminmaxsd_round:
+; X86: # %bb.0:
+; X86-NEXT: vmovsd %xmm2, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x10,0xc2]
+; X86-NEXT: # xmm0 = xmm2[0],xmm0[1]
+; X86-NEXT: retl # encoding: [0xc3]
+;; Constant false mask (i8 0) forces the output to be the first source operand
+ %ret = call <2 x double> @llvm.x86.avx10.mask.vminmaxsd.round(<2 x double> %A, <2 x double> %B, i32 127, <2 x double> %C,
+ i8 0,
+ i32 8)
+ ret <2 x double> %ret
+}
+
declare<2 x double> @llvm.x86.avx10.mask.vminmaxsd.round(<2 x double> %A, <2 x double> %B, i32 %C, <2 x double> %D, i8 %E, i32 %F)
define <8 x half>@test_int_x86_vminmaxsh(<8 x half> %A, <8 x half> %B) nounwind {
@@ -659,6 +678,25 @@ define <8 x half>@test_int_x86_maskz_vminmaxsh_round(<8 x half> %A, <8 x half> %
ret <8 x half> %ret
}
+define <8 x half> @test_upper_lane_passthrough_vminmaxsh_round(<8 x half> %A, <8 x half> %B, <8 x half> %C) nounwind {
+; X64-LABEL: test_upper_lane_passthrough_vminmaxsh_round:
+; X64: # %bb.0:
+; X64-NEXT: vmovsh %xmm2, %xmm0, %xmm0 # encoding: [0x62,0xf5,0x7e,0x08,0x10,0xc2]
+; X64-NEXT: # xmm0 = xmm2[0],xmm0[1,2,3,4,5,6,7]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_upper_lane_passthrough_vminmaxsh_round:
+; X86: # %bb.0:
+; X86-NEXT: vmovsh %xmm2, %xmm0, %xmm0 # encoding: [0x62,0xf5,0x7e,0x08,0x10,0xc2]
+; X86-NEXT: # xmm0 = xmm2[0],xmm0[1,2,3,4,5,6,7]
+; X86-NEXT: retl # encoding: [0xc3]
+;; Constant false mask (i8 0) forces the output to be the first source operand
+ %ret = call <8 x half> @llvm.x86.avx10.mask.vminmaxsh.round(<8 x half> %A, <8 x half> %B, i32 127, <8 x half> %C,
+ i8 0,
+ i32 8)
+ ret <8 x half> %ret
+}
+
declare<8 x half> @llvm.x86.avx10.mask.vminmaxsh.round(<8 x half> %A, <8 x half> %B, i32 %C, <8 x half> %D, i8 %E, i32 %F)
define <4 x float>@test_int_x86_vminmaxss(<4 x float> %A, <4 x float> %B) nounwind {
@@ -757,4 +795,23 @@ define <4 x float>@test_int_x86_maskz_vminmaxss_round(<4 x float> %A, <4 x float
ret <4 x float> %ret
}
+define <4 x float> @test_upper_lane_passthrough_vminmaxss_round(<4 x float> %A, <4 x float> %B, <4 x float> %C) nounwind {
+; X64-LABEL: test_upper_lane_passthrough_vminmaxss_round:
+; X64: # %bb.0:
+; X64-NEXT: vmovss %xmm2, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x10,0xc2]
+; X64-NEXT: # xmm0 = xmm2[0],xmm0[1,2,3]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_upper_lane_passthrough_vminmaxss_round:
+; X86: # %bb.0:
+; X86-NEXT: vmovss %xmm2, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x10,0xc2]
+; X86-NEXT: # xmm0 = xmm2[0],xmm0[1,2,3]
+; X86-NEXT: retl # encoding: [0xc3]
+;; Constant false mask (i8 0) forces the output to be the first source operand
+ %ret = call <4 x float> @llvm.x86.avx10.mask.vminmaxss.round(<4 x float> %A, <4 x float> %B, i32 0, <4 x float> %C,
+ i8 0,
+ i32 8)
+ ret <4 x float> %ret
+}
+
declare<4 x float> @llvm.x86.avx10.mask.vminmaxss.round(<4 x float> %A, <4 x float> %B, i32 %C, <4 x float> %D, i8 %E, i32 %F)
More information about the llvm-commits
mailing list