[llvm] [X86] Add missing scalar-load pattern for (V)CVTSS2SD blending with v2f64 pass through value (PR #126955)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Feb 12 10:53:15 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-x86
Author: Simon Pilgrim (RKSimon)
<details>
<summary>Changes</summary>
---
Full diff: https://github.com/llvm/llvm-project/pull/126955.diff
3 Files Affected:
- (modified) llvm/lib/Target/X86/X86InstrAVX512.td (+6)
- (modified) llvm/lib/Target/X86/X86InstrSSE.td (+10)
- (modified) llvm/test/CodeGen/X86/sse2-intrinsics-x86-upgrade.ll (+12-48)
``````````diff
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index 9d8c123185a7c..f8e1553d07039 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -7777,6 +7777,12 @@ def : Pat<(v2f64 (X86Movsd
(VCVTSS2SDZrr_Int VR128X:$dst, VR128X:$src)>,
Requires<[HasAVX512]>;
+def : Pat<(v2f64 (X86Movsd
+ (v2f64 VR128X:$dst),
+ (v2f64 (scalar_to_vector (f64 (any_fpextend (loadf32 addr:$src))))))),
+ (VCVTSS2SDZrm_Int VR128:$dst, ssmem:$src)>,
+ Requires<[HasAVX512]>;
+
//===----------------------------------------------------------------------===//
// AVX-512 Vector convert from signed/unsigned integer to float/double
// and from float/double to signed/unsigned integer
diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td
index 6aadb788c851e..71ca383d59767 100644
--- a/llvm/lib/Target/X86/X86InstrSSE.td
+++ b/llvm/lib/Target/X86/X86InstrSSE.td
@@ -1425,6 +1425,11 @@ def : Pat<(v2f64 (X86Movsd
(f64 (any_fpextend (f32 (extractelt VR128:$src, (iPTR 0))))))))),
(VCVTSS2SDrr_Int VR128:$dst, VR128:$src)>;
+def : Pat<(v2f64 (X86Movsd
+ (v2f64 VR128:$dst),
+ (v2f64 (scalar_to_vector (f64 (any_fpextend (loadf32 addr:$src))))))),
+ (VCVTSS2SDrm_Int VR128:$dst, ssmem:$src)>;
+
def : Pat<(v4f32 (X86Movss
(v4f32 VR128:$dst),
(v4f32 (scalar_to_vector (f32 (any_sint_to_fp GR64:$src)))))),
@@ -1479,6 +1484,11 @@ def : Pat<(v2f64 (X86Movsd
(f64 (any_fpextend (f32 (extractelt VR128:$src, (iPTR 0))))))))),
(CVTSS2SDrr_Int VR128:$dst, VR128:$src)>;
+def : Pat<(v2f64 (X86Movsd
+ (v2f64 VR128:$dst),
+ (v2f64 (scalar_to_vector (f64 (any_fpextend (loadf32 addr:$src))))))),
+ (CVTSS2SDrm_Int VR128:$dst, ssmem:$src)>;
+
def : Pat<(v2f64 (X86Movsd
(v2f64 VR128:$dst),
(v2f64 (scalar_to_vector (f64 (any_sint_to_fp GR64:$src)))))),
diff --git a/llvm/test/CodeGen/X86/sse2-intrinsics-x86-upgrade.ll b/llvm/test/CodeGen/X86/sse2-intrinsics-x86-upgrade.ll
index f6b0df153c260..0f4b1d915e78f 100644
--- a/llvm/test/CodeGen/X86/sse2-intrinsics-x86-upgrade.ll
+++ b/llvm/test/CodeGen/X86/sse2-intrinsics-x86-upgrade.ll
@@ -711,58 +711,34 @@ define <2 x double> @test_x86_sse2_cvtss2sd_load(<2 x double> %a0, ptr %p1) {
; X86-SSE-LABEL: test_x86_sse2_cvtss2sd_load:
; X86-SSE: ## %bb.0:
; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
-; X86-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; X86-SSE-NEXT: ## encoding: [0xf3,0x0f,0x10,0x08]
-; X86-SSE-NEXT: cvtss2sd %xmm1, %xmm1 ## encoding: [0xf3,0x0f,0x5a,0xc9]
-; X86-SSE-NEXT: movsd %xmm1, %xmm0 ## encoding: [0xf2,0x0f,0x10,0xc1]
-; X86-SSE-NEXT: ## xmm0 = xmm1[0],xmm0[1]
+; X86-SSE-NEXT: cvtss2sd (%eax), %xmm0 ## encoding: [0xf3,0x0f,0x5a,0x00]
; X86-SSE-NEXT: retl ## encoding: [0xc3]
;
; X86-AVX1-LABEL: test_x86_sse2_cvtss2sd_load:
; X86-AVX1: ## %bb.0:
; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
-; X86-AVX1-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; X86-AVX1-NEXT: ## encoding: [0xc5,0xfa,0x10,0x08]
-; X86-AVX1-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf2,0x5a,0xc9]
-; X86-AVX1-NEXT: vblendps $3, %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x0c,0xc1,0x03]
-; X86-AVX1-NEXT: ## xmm0 = xmm1[0,1],xmm0[2,3]
+; X86-AVX1-NEXT: vcvtss2sd (%eax), %xmm0, %xmm0 ## encoding: [0xc5,0xfa,0x5a,0x00]
; X86-AVX1-NEXT: retl ## encoding: [0xc3]
;
; X86-AVX512-LABEL: test_x86_sse2_cvtss2sd_load:
; X86-AVX512: ## %bb.0:
; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
-; X86-AVX512-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; X86-AVX512-NEXT: ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x08]
-; X86-AVX512-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf2,0x5a,0xc9]
-; X86-AVX512-NEXT: vblendps $3, %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x0c,0xc1,0x03]
-; X86-AVX512-NEXT: ## xmm0 = xmm1[0,1],xmm0[2,3]
+; X86-AVX512-NEXT: vcvtss2sd (%eax), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x5a,0x00]
; X86-AVX512-NEXT: retl ## encoding: [0xc3]
;
; X64-SSE-LABEL: test_x86_sse2_cvtss2sd_load:
; X64-SSE: ## %bb.0:
-; X64-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; X64-SSE-NEXT: ## encoding: [0xf3,0x0f,0x10,0x0f]
-; X64-SSE-NEXT: cvtss2sd %xmm1, %xmm1 ## encoding: [0xf3,0x0f,0x5a,0xc9]
-; X64-SSE-NEXT: movsd %xmm1, %xmm0 ## encoding: [0xf2,0x0f,0x10,0xc1]
-; X64-SSE-NEXT: ## xmm0 = xmm1[0],xmm0[1]
+; X64-SSE-NEXT: cvtss2sd (%rdi), %xmm0 ## encoding: [0xf3,0x0f,0x5a,0x07]
; X64-SSE-NEXT: retq ## encoding: [0xc3]
;
; X64-AVX1-LABEL: test_x86_sse2_cvtss2sd_load:
; X64-AVX1: ## %bb.0:
-; X64-AVX1-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; X64-AVX1-NEXT: ## encoding: [0xc5,0xfa,0x10,0x0f]
-; X64-AVX1-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf2,0x5a,0xc9]
-; X64-AVX1-NEXT: vblendps $3, %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x0c,0xc1,0x03]
-; X64-AVX1-NEXT: ## xmm0 = xmm1[0,1],xmm0[2,3]
+; X64-AVX1-NEXT: vcvtss2sd (%rdi), %xmm0, %xmm0 ## encoding: [0xc5,0xfa,0x5a,0x07]
; X64-AVX1-NEXT: retq ## encoding: [0xc3]
;
; X64-AVX512-LABEL: test_x86_sse2_cvtss2sd_load:
; X64-AVX512: ## %bb.0:
-; X64-AVX512-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; X64-AVX512-NEXT: ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x0f]
-; X64-AVX512-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf2,0x5a,0xc9]
-; X64-AVX512-NEXT: vblendps $3, %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x0c,0xc1,0x03]
-; X64-AVX512-NEXT: ## xmm0 = xmm1[0,1],xmm0[2,3]
+; X64-AVX512-NEXT: vcvtss2sd (%rdi), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x5a,0x07]
; X64-AVX512-NEXT: retq ## encoding: [0xc3]
%a1 = load <4 x float>, ptr %p1
%res = call <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double> %a0, <4 x float> %a1) ; <<2 x double>> [#uses=1]
@@ -774,46 +750,34 @@ define <2 x double> @test_x86_sse2_cvtss2sd_load_optsize(<2 x double> %a0, ptr %
; X86-SSE-LABEL: test_x86_sse2_cvtss2sd_load_optsize:
; X86-SSE: ## %bb.0:
; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
-; X86-SSE-NEXT: cvtss2sd (%eax), %xmm1 ## encoding: [0xf3,0x0f,0x5a,0x08]
-; X86-SSE-NEXT: movsd %xmm1, %xmm0 ## encoding: [0xf2,0x0f,0x10,0xc1]
-; X86-SSE-NEXT: ## xmm0 = xmm1[0],xmm0[1]
+; X86-SSE-NEXT: cvtss2sd (%eax), %xmm0 ## encoding: [0xf3,0x0f,0x5a,0x00]
; X86-SSE-NEXT: retl ## encoding: [0xc3]
;
; X86-AVX1-LABEL: test_x86_sse2_cvtss2sd_load_optsize:
; X86-AVX1: ## %bb.0:
; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
-; X86-AVX1-NEXT: vcvtss2sd (%eax), %xmm1, %xmm1 ## encoding: [0xc5,0xf2,0x5a,0x08]
-; X86-AVX1-NEXT: vmovsd %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x10,0xc1]
-; X86-AVX1-NEXT: ## xmm0 = xmm1[0],xmm0[1]
+; X86-AVX1-NEXT: vcvtss2sd (%eax), %xmm0, %xmm0 ## encoding: [0xc5,0xfa,0x5a,0x00]
; X86-AVX1-NEXT: retl ## encoding: [0xc3]
;
; X86-AVX512-LABEL: test_x86_sse2_cvtss2sd_load_optsize:
; X86-AVX512: ## %bb.0:
; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
-; X86-AVX512-NEXT: vcvtss2sd (%eax), %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf2,0x5a,0x08]
-; X86-AVX512-NEXT: vmovsd %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0xc1]
-; X86-AVX512-NEXT: ## xmm0 = xmm1[0],xmm0[1]
+; X86-AVX512-NEXT: vcvtss2sd (%eax), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x5a,0x00]
; X86-AVX512-NEXT: retl ## encoding: [0xc3]
;
; X64-SSE-LABEL: test_x86_sse2_cvtss2sd_load_optsize:
; X64-SSE: ## %bb.0:
-; X64-SSE-NEXT: cvtss2sd (%rdi), %xmm1 ## encoding: [0xf3,0x0f,0x5a,0x0f]
-; X64-SSE-NEXT: movsd %xmm1, %xmm0 ## encoding: [0xf2,0x0f,0x10,0xc1]
-; X64-SSE-NEXT: ## xmm0 = xmm1[0],xmm0[1]
+; X64-SSE-NEXT: cvtss2sd (%rdi), %xmm0 ## encoding: [0xf3,0x0f,0x5a,0x07]
; X64-SSE-NEXT: retq ## encoding: [0xc3]
;
; X64-AVX1-LABEL: test_x86_sse2_cvtss2sd_load_optsize:
; X64-AVX1: ## %bb.0:
-; X64-AVX1-NEXT: vcvtss2sd (%rdi), %xmm1, %xmm1 ## encoding: [0xc5,0xf2,0x5a,0x0f]
-; X64-AVX1-NEXT: vmovsd %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x10,0xc1]
-; X64-AVX1-NEXT: ## xmm0 = xmm1[0],xmm0[1]
+; X64-AVX1-NEXT: vcvtss2sd (%rdi), %xmm0, %xmm0 ## encoding: [0xc5,0xfa,0x5a,0x07]
; X64-AVX1-NEXT: retq ## encoding: [0xc3]
;
; X64-AVX512-LABEL: test_x86_sse2_cvtss2sd_load_optsize:
; X64-AVX512: ## %bb.0:
-; X64-AVX512-NEXT: vcvtss2sd (%rdi), %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf2,0x5a,0x0f]
-; X64-AVX512-NEXT: vmovsd %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0xc1]
-; X64-AVX512-NEXT: ## xmm0 = xmm1[0],xmm0[1]
+; X64-AVX512-NEXT: vcvtss2sd (%rdi), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x5a,0x07]
; X64-AVX512-NEXT: retq ## encoding: [0xc3]
%a1 = load <4 x float>, ptr %p1
%res = call <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double> %a0, <4 x float> %a1) ; <<2 x double>> [#uses=1]
``````````
</details>
https://github.com/llvm/llvm-project/pull/126955
More information about the llvm-commits
mailing list