[llvm] fec4749 - [WebAssembly] Lower v2f32 to v2f64 extending loads with promote_low

Thomas Lively via llvm-commits llvm-commits at lists.llvm.org
Wed Sep 1 10:27:47 PDT 2021


Author: Thomas Lively
Date: 2021-09-01T10:27:42-07:00
New Revision: fec4749200e0068494d8a6cd80b5b6f91403cfbe

URL: https://github.com/llvm/llvm-project/commit/fec4749200e0068494d8a6cd80b5b6f91403cfbe
DIFF: https://github.com/llvm/llvm-project/commit/fec4749200e0068494d8a6cd80b5b6f91403cfbe.diff

LOG: [WebAssembly] Lower v2f32 to v2f64 extending loads with promote_low

Previously extra wide v4f32 to v4f64 extending loads would be legalized to v2f32
to v2f64 extending loads, which would then be scalarized by legalization. (v2f32
to v2f64 extending loads not produced by legalization were already being emitted
correctly.) Instead, mark v2f32 to v2f64 extending loads as legal and explicitly
lower them using promote_low. This regresses the addressing modes supported for
the extloads not produced by legalization, but that's a fine trade off for now.

Differential Revision: https://reviews.llvm.org/D108496

Added: 
    llvm/test/CodeGen/WebAssembly/simd-load-promote-wide.ll

Modified: 
    llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
    llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
    llvm/test/CodeGen/WebAssembly/simd-offset.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
index 75c7841fd655d..e816a76a4e516 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@@ -303,6 +303,7 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering(
       setLoadExtAction(Ext, MVT::v4i32, MVT::v4i16, Legal);
       setLoadExtAction(Ext, MVT::v2i64, MVT::v2i32, Legal);
     }
+    setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2f32, Legal);
   }
 
   // Don't do anything clever with build_pairs

diff  --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
index e743c24251633..551176fdd2336 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
@@ -1288,6 +1288,19 @@ def promote_t : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>]>;
 def promote_low : SDNode<"WebAssemblyISD::PROMOTE_LOW", promote_t>;
 defm "" : SIMDConvert<F64x2, F32x4, promote_low, "promote_low_f32x4", 0x5f>;
 
+// Lower extending loads to load64_zero + promote_low
+def extloadv2f32 : PatFrag<(ops node:$ptr), (extload node:$ptr)> {
+  let MemoryVT = v2f32;
+}
+// Adapted from the body of LoadPatNoOffset
+// TODO: other addressing patterns
+def : Pat<(v2f64 (extloadv2f32 (i32 I32:$addr))),
+          (promote_low_F64x2 (LOAD_ZERO_I64x2_A32 0, 0, I32:$addr))>,
+      Requires<[HasAddr32]>;
+def : Pat<(v2f64 (extloadv2f32 (i64 I64:$addr))),
+          (promote_low_F64x2 (LOAD_ZERO_I64x2_A64 0, 0, I64:$addr))>,
+      Requires<[HasAddr64]>;
+
 //===----------------------------------------------------------------------===//
 // Saturating Rounding Q-Format Multiplication
 //===----------------------------------------------------------------------===//

diff  --git a/llvm/test/CodeGen/WebAssembly/simd-load-promote-wide.ll b/llvm/test/CodeGen/WebAssembly/simd-load-promote-wide.ll
new file mode 100644
index 0000000000000..b88c1b45e2295
--- /dev/null
+++ b/llvm/test/CodeGen/WebAssembly/simd-load-promote-wide.ll
@@ -0,0 +1,203 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -verify-machineinstrs -mattr=+simd128 | FileCheck %s
+
+; Test wide load+promote patterns, which after combines and legalization are
+; represented 
diff erently than 128-bit load+promote patterns.
+
+target triple = "wasm32-unknown-unknown"
+
+define <4 x double> @load_promote_v2f64(<4 x float>* %p) {
+; CHECK-LABEL: load_promote_v2f64:
+; CHECK:         .functype load_promote_v2f64 (i32, i32) -> ()
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:    local.get 0
+; CHECK-NEXT:    local.get 1
+; CHECK-NEXT:    i32.const 8
+; CHECK-NEXT:    i32.add
+; CHECK-NEXT:    v128.load64_zero 0
+; CHECK-NEXT:    f64x2.promote_low_f32x4
+; CHECK-NEXT:    v128.store 16
+; CHECK-NEXT:    local.get 0
+; CHECK-NEXT:    local.get 1
+; CHECK-NEXT:    v128.load64_zero 0
+; CHECK-NEXT:    f64x2.promote_low_f32x4
+; CHECK-NEXT:    v128.store 0
+; CHECK-NEXT:    # fallthrough-return
+  %e = load <4 x float>, <4 x float>* %p
+  %v = fpext <4 x float> %e to <4 x double>
+  ret <4 x double> %v
+}
+
+define <4 x double> @load_promote_v2f64_with_folded_offset(<4 x float>* %p) {
+; CHECK-LABEL: load_promote_v2f64_with_folded_offset:
+; CHECK:         .functype load_promote_v2f64_with_folded_offset (i32, i32) -> ()
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:    local.get 0
+; CHECK-NEXT:    local.get 1
+; CHECK-NEXT:    i32.const 24
+; CHECK-NEXT:    i32.add
+; CHECK-NEXT:    v128.load64_zero 0
+; CHECK-NEXT:    f64x2.promote_low_f32x4
+; CHECK-NEXT:    v128.store 16
+; CHECK-NEXT:    local.get 0
+; CHECK-NEXT:    local.get 1
+; CHECK-NEXT:    i32.const 16
+; CHECK-NEXT:    i32.add
+; CHECK-NEXT:    v128.load64_zero 0
+; CHECK-NEXT:    f64x2.promote_low_f32x4
+; CHECK-NEXT:    v128.store 0
+; CHECK-NEXT:    # fallthrough-return
+  %q = ptrtoint <4 x float>* %p to i32
+  %r = add nuw i32 %q, 16
+  %s = inttoptr i32 %r to <4 x float>*
+  %e = load <4 x float>, <4 x float>* %s
+  %v = fpext <4 x float> %e to <4 x double>
+  ret <4 x double> %v
+}
+
+define <4 x double> @load_promote_v2f64_with_folded_gep_offset(<4 x float>* %p) {
+; CHECK-LABEL: load_promote_v2f64_with_folded_gep_offset:
+; CHECK:         .functype load_promote_v2f64_with_folded_gep_offset (i32, i32) -> ()
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:    local.get 0
+; CHECK-NEXT:    local.get 1
+; CHECK-NEXT:    i32.const 24
+; CHECK-NEXT:    i32.add
+; CHECK-NEXT:    v128.load64_zero 0
+; CHECK-NEXT:    f64x2.promote_low_f32x4
+; CHECK-NEXT:    v128.store 16
+; CHECK-NEXT:    local.get 0
+; CHECK-NEXT:    local.get 1
+; CHECK-NEXT:    i32.const 16
+; CHECK-NEXT:    i32.add
+; CHECK-NEXT:    v128.load64_zero 0
+; CHECK-NEXT:    f64x2.promote_low_f32x4
+; CHECK-NEXT:    v128.store 0
+; CHECK-NEXT:    # fallthrough-return
+  %s = getelementptr inbounds <4 x float>, <4 x float>* %p, i32 1
+  %e = load <4 x float>, <4 x float>* %s
+  %v = fpext <4 x float> %e to <4 x double>
+  ret <4 x double> %v
+}
+
+define <4 x double> @load_promote_v2f64_with_unfolded_gep_negative_offset(<4 x float>* %p) {
+; CHECK-LABEL: load_promote_v2f64_with_unfolded_gep_negative_offset:
+; CHECK:         .functype load_promote_v2f64_with_unfolded_gep_negative_offset (i32, i32) -> ()
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:    local.get 0
+; CHECK-NEXT:    local.get 1
+; CHECK-NEXT:    i32.const -16
+; CHECK-NEXT:    i32.add
+; CHECK-NEXT:    local.tee 1
+; CHECK-NEXT:    v128.load64_zero 0
+; CHECK-NEXT:    f64x2.promote_low_f32x4
+; CHECK-NEXT:    v128.store 0
+; CHECK-NEXT:    local.get 0
+; CHECK-NEXT:    local.get 1
+; CHECK-NEXT:    i32.const 8
+; CHECK-NEXT:    i32.add
+; CHECK-NEXT:    v128.load64_zero 0
+; CHECK-NEXT:    f64x2.promote_low_f32x4
+; CHECK-NEXT:    v128.store 16
+; CHECK-NEXT:    # fallthrough-return
+  %s = getelementptr inbounds <4 x float>, <4 x float>* %p, i32 -1
+  %e = load <4 x float>, <4 x float>* %s
+  %v = fpext <4 x float> %e to <4 x double>
+  ret <4 x double> %v
+}
+
+define <4 x double> @load_promote_v2f64_with_unfolded_offset(<4 x float>* %p) {
+; CHECK-LABEL: load_promote_v2f64_with_unfolded_offset:
+; CHECK:         .functype load_promote_v2f64_with_unfolded_offset (i32, i32) -> ()
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:    local.get 0
+; CHECK-NEXT:    local.get 1
+; CHECK-NEXT:    i32.const 24
+; CHECK-NEXT:    i32.add
+; CHECK-NEXT:    v128.load64_zero 0
+; CHECK-NEXT:    f64x2.promote_low_f32x4
+; CHECK-NEXT:    v128.store 16
+; CHECK-NEXT:    local.get 0
+; CHECK-NEXT:    local.get 1
+; CHECK-NEXT:    i32.const 16
+; CHECK-NEXT:    i32.add
+; CHECK-NEXT:    v128.load64_zero 0
+; CHECK-NEXT:    f64x2.promote_low_f32x4
+; CHECK-NEXT:    v128.store 0
+; CHECK-NEXT:    # fallthrough-return
+  %q = ptrtoint <4 x float>* %p to i32
+  %r = add nsw i32 %q, 16
+  %s = inttoptr i32 %r to <4 x float>*
+  %e = load <4 x float>, <4 x float>* %s
+  %v = fpext <4 x float> %e to <4 x double>
+  ret <4 x double> %v
+}
+
+define <4 x double> @load_promote_v2f64_with_unfolded_gep_offset(<4 x float>* %p) {
+; CHECK-LABEL: load_promote_v2f64_with_unfolded_gep_offset:
+; CHECK:         .functype load_promote_v2f64_with_unfolded_gep_offset (i32, i32) -> ()
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:    local.get 0
+; CHECK-NEXT:    local.get 1
+; CHECK-NEXT:    i32.const 24
+; CHECK-NEXT:    i32.add
+; CHECK-NEXT:    v128.load64_zero 0
+; CHECK-NEXT:    f64x2.promote_low_f32x4
+; CHECK-NEXT:    v128.store 16
+; CHECK-NEXT:    local.get 0
+; CHECK-NEXT:    local.get 1
+; CHECK-NEXT:    i32.const 16
+; CHECK-NEXT:    i32.add
+; CHECK-NEXT:    v128.load64_zero 0
+; CHECK-NEXT:    f64x2.promote_low_f32x4
+; CHECK-NEXT:    v128.store 0
+; CHECK-NEXT:    # fallthrough-return
+  %s = getelementptr <4 x float>, <4 x float>* %p, i32 1
+  %e = load <4 x float>, <4 x float>* %s
+  %v = fpext <4 x float> %e to <4 x double>
+  ret <4 x double> %v
+}
+
+define <4 x double> @load_promote_v2f64_from_numeric_address() {
+; CHECK-LABEL: load_promote_v2f64_from_numeric_address:
+; CHECK:         .functype load_promote_v2f64_from_numeric_address (i32) -> ()
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:    local.get 0
+; CHECK-NEXT:    i32.const 40
+; CHECK-NEXT:    v128.load64_zero 0
+; CHECK-NEXT:    f64x2.promote_low_f32x4
+; CHECK-NEXT:    v128.store 16
+; CHECK-NEXT:    local.get 0
+; CHECK-NEXT:    i32.const 32
+; CHECK-NEXT:    v128.load64_zero 0
+; CHECK-NEXT:    f64x2.promote_low_f32x4
+; CHECK-NEXT:    v128.store 0
+; CHECK-NEXT:    # fallthrough-return
+  %s = inttoptr i32 32 to <4 x float>*
+  %e = load <4 x float>, <4 x float>* %s
+  %v = fpext <4 x float> %e to <4 x double>
+  ret <4 x double> %v
+}
+
+ at gv_v4f32 = global <4 x float> <float 42., float 42., float 42., float 42.>
+define <4 x double> @load_promote_v2f64_from_global_address() {
+; CHECK-LABEL: load_promote_v2f64_from_global_address:
+; CHECK:         .functype load_promote_v2f64_from_global_address (i32) -> ()
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:    local.get 0
+; CHECK-NEXT:    i32.const gv_v4f32
+; CHECK-NEXT:    i32.const 8
+; CHECK-NEXT:    i32.add
+; CHECK-NEXT:    v128.load64_zero 0
+; CHECK-NEXT:    f64x2.promote_low_f32x4
+; CHECK-NEXT:    v128.store 16
+; CHECK-NEXT:    local.get 0
+; CHECK-NEXT:    i32.const gv_v4f32
+; CHECK-NEXT:    v128.load64_zero 0
+; CHECK-NEXT:    f64x2.promote_low_f32x4
+; CHECK-NEXT:    v128.store 0
+; CHECK-NEXT:    # fallthrough-return
+  %e = load <4 x float>, <4 x float>* @gv_v4f32
+  %v = fpext <4 x float> %e to <4 x double>
+  ret <4 x double> %v
+}

diff  --git a/llvm/test/CodeGen/WebAssembly/simd-offset.ll b/llvm/test/CodeGen/WebAssembly/simd-offset.ll
index 62d32b6be4f7a..0800893a00079 100644
--- a/llvm/test/CodeGen/WebAssembly/simd-offset.ll
+++ b/llvm/test/CodeGen/WebAssembly/simd-offset.ll
@@ -2957,6 +2957,19 @@ define <2 x double> @load_splat_v2f64(double* %p) {
   ret <2 x double> %v2
 }
 
+define <2 x double> @load_promote_v2f64(<2 x float>* %p) {
+; CHECK-LABEL: load_promote_v2f64:
+; CHECK:         .functype load_promote_v2f64 (i32) -> (v128)
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:    local.get 0
+; CHECK-NEXT:    v128.load64_zero 0
+; CHECK-NEXT:    f64x2.promote_low_f32x4
+; CHECK-NEXT:    # fallthrough-return
+  %e = load <2 x float>, <2 x float>* %p
+  %v = fpext <2 x float> %e to <2 x double>
+  ret <2 x double> %v
+}
+
 define <2 x double> @load_v2f64_with_folded_offset(<2 x double>* %p) {
 ; CHECK-LABEL: load_v2f64_with_folded_offset:
 ; CHECK:         .functype load_v2f64_with_folded_offset (i32) -> (v128)
@@ -2987,6 +3000,24 @@ define <2 x double> @load_splat_v2f64_with_folded_offset(double* %p) {
   ret <2 x double> %v2
 }
 
+define <2 x double> @load_promote_v2f64_with_folded_offset(<2 x float>* %p) {
+; CHECK-LABEL: load_promote_v2f64_with_folded_offset:
+; CHECK:         .functype load_promote_v2f64_with_folded_offset (i32) -> (v128)
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:    local.get 0
+; CHECK-NEXT:    i32.const 16
+; CHECK-NEXT:    i32.add
+; CHECK-NEXT:    v128.load64_zero 0
+; CHECK-NEXT:    f64x2.promote_low_f32x4
+; CHECK-NEXT:    # fallthrough-return
+  %q = ptrtoint <2 x float>* %p to i32
+  %r = add nuw i32 %q, 16
+  %s = inttoptr i32 %r to <2 x float>*
+  %e = load <2 x float>, <2 x float>* %s
+  %v = fpext <2 x float> %e to <2 x double>
+  ret <2 x double> %v
+}
+
 define <2 x double> @load_v2f64_with_folded_gep_offset(<2 x double>* %p) {
 ; CHECK-LABEL: load_v2f64_with_folded_gep_offset:
 ; CHECK:         .functype load_v2f64_with_folded_gep_offset (i32) -> (v128)
@@ -3013,6 +3044,22 @@ define <2 x double> @load_splat_v2f64_with_folded_gep_offset(double* %p) {
   ret <2 x double> %v2
 }
 
+define <2 x double> @load_promote_v2f64_with_folded_gep_offset(<2 x float>* %p) {
+; CHECK-LABEL: load_promote_v2f64_with_folded_gep_offset:
+; CHECK:         .functype load_promote_v2f64_with_folded_gep_offset (i32) -> (v128)
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:    local.get 0
+; CHECK-NEXT:    i32.const 8
+; CHECK-NEXT:    i32.add
+; CHECK-NEXT:    v128.load64_zero 0
+; CHECK-NEXT:    f64x2.promote_low_f32x4
+; CHECK-NEXT:    # fallthrough-return
+  %s = getelementptr inbounds <2 x float>, <2 x float>* %p, i32 1
+  %e = load <2 x float>, <2 x float>* %s
+  %v = fpext <2 x float> %e to <2 x double>
+  ret <2 x double> %v
+}
+
 define <2 x double> @load_v2f64_with_unfolded_gep_negative_offset(<2 x double>* %p) {
 ; CHECK-LABEL: load_v2f64_with_unfolded_gep_negative_offset:
 ; CHECK:         .functype load_v2f64_with_unfolded_gep_negative_offset (i32) -> (v128)
@@ -3043,6 +3090,22 @@ define <2 x double> @load_splat_v2f64_with_unfolded_gep_negative_offset(double*
   ret <2 x double> %v2
 }
 
+define <2 x double> @load_promote_v2f64_with_unfolded_gep_negative_offset(<2 x float>* %p) {
+; CHECK-LABEL: load_promote_v2f64_with_unfolded_gep_negative_offset:
+; CHECK:         .functype load_promote_v2f64_with_unfolded_gep_negative_offset (i32) -> (v128)
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:    local.get 0
+; CHECK-NEXT:    i32.const -8
+; CHECK-NEXT:    i32.add
+; CHECK-NEXT:    v128.load64_zero 0
+; CHECK-NEXT:    f64x2.promote_low_f32x4
+; CHECK-NEXT:    # fallthrough-return
+  %s = getelementptr inbounds <2 x float>, <2 x float>* %p, i32 -1
+  %e = load <2 x float>, <2 x float>* %s
+  %v = fpext <2 x float> %e to <2 x double>
+  ret <2 x double> %v
+}
+
 define <2 x double> @load_v2f64_with_unfolded_offset(<2 x double>* %p) {
 ; CHECK-LABEL: load_v2f64_with_unfolded_offset:
 ; CHECK:         .functype load_v2f64_with_unfolded_offset (i32) -> (v128)
@@ -3077,6 +3140,24 @@ define <2 x double> @load_splat_v2f64_with_unfolded_offset(double* %p) {
   ret <2 x double> %v2
 }
 
+define <2 x double> @load_promote_v2f64_with_unfolded_offset(<2 x float>* %p) {
+; CHECK-LABEL: load_promote_v2f64_with_unfolded_offset:
+; CHECK:         .functype load_promote_v2f64_with_unfolded_offset (i32) -> (v128)
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:    local.get 0
+; CHECK-NEXT:    i32.const 16
+; CHECK-NEXT:    i32.add
+; CHECK-NEXT:    v128.load64_zero 0
+; CHECK-NEXT:    f64x2.promote_low_f32x4
+; CHECK-NEXT:    # fallthrough-return
+  %q = ptrtoint <2 x float>* %p to i32
+  %r = add nsw i32 %q, 16
+  %s = inttoptr i32 %r to <2 x float>*
+  %e = load <2 x float>, <2 x float>* %s
+  %v = fpext <2 x float> %e to <2 x double>
+  ret <2 x double> %v
+}
+
 define <2 x double> @load_v2f64_with_unfolded_gep_offset(<2 x double>* %p) {
 ; CHECK-LABEL: load_v2f64_with_unfolded_gep_offset:
 ; CHECK:         .functype load_v2f64_with_unfolded_gep_offset (i32) -> (v128)
@@ -3107,6 +3188,22 @@ define <2 x double> @load_splat_v2f64_with_unfolded_gep_offset(double* %p) {
   ret <2 x double> %v2
 }
 
+define <2 x double> @load_promote_v2f64_with_unfolded_gep_offset(<2 x float>* %p) {
+; CHECK-LABEL: load_promote_v2f64_with_unfolded_gep_offset:
+; CHECK:         .functype load_promote_v2f64_with_unfolded_gep_offset (i32) -> (v128)
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:    local.get 0
+; CHECK-NEXT:    i32.const 8
+; CHECK-NEXT:    i32.add
+; CHECK-NEXT:    v128.load64_zero 0
+; CHECK-NEXT:    f64x2.promote_low_f32x4
+; CHECK-NEXT:    # fallthrough-return
+  %s = getelementptr <2 x float>, <2 x float>* %p, i32 1
+  %e = load <2 x float>, <2 x float>* %s
+  %v = fpext <2 x float> %e to <2 x double>
+  ret <2 x double> %v
+}
+
 define <2 x double> @load_v2f64_from_numeric_address() {
 ; CHECK-LABEL: load_v2f64_from_numeric_address:
 ; CHECK:         .functype load_v2f64_from_numeric_address () -> (v128)
@@ -3133,6 +3230,20 @@ define <2 x double> @load_splat_v2f64_from_numeric_address() {
   ret <2 x double> %v2
 }
 
+define <2 x double> @load_promote_v2f64_from_numeric_address() {
+; CHECK-LABEL: load_promote_v2f64_from_numeric_address:
+; CHECK:         .functype load_promote_v2f64_from_numeric_address () -> (v128)
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:    i32.const 32
+; CHECK-NEXT:    v128.load64_zero 0
+; CHECK-NEXT:    f64x2.promote_low_f32x4
+; CHECK-NEXT:    # fallthrough-return
+  %s = inttoptr i32 32 to <2 x float>*
+  %e = load <2 x float>, <2 x float>* %s
+  %v = fpext <2 x float> %e to <2 x double>
+  ret <2 x double> %v
+}
+
 @gv_v2f64 = global <2 x double> <double 42., double 42.>
 define <2 x double> @load_v2f64_from_global_address() {
 ; CHECK-LABEL: load_v2f64_from_global_address:
@@ -3159,6 +3270,20 @@ define <2 x double> @load_splat_v2f64_from_global_address() {
   ret <2 x double> %v2
 }
 
+ at gv_v2f32 = global <2 x float> <float 42., float 42.>
+define <2 x double> @load_promote_v2f64_from_global_address() {
+; CHECK-LABEL: load_promote_v2f64_from_global_address:
+; CHECK:         .functype load_promote_v2f64_from_global_address () -> (v128)
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:    i32.const gv_v2f32
+; CHECK-NEXT:    v128.load64_zero 0
+; CHECK-NEXT:    f64x2.promote_low_f32x4
+; CHECK-NEXT:    # fallthrough-return
+  %e = load <2 x float>, <2 x float>* @gv_v2f32
+  %v = fpext <2 x float> %e to <2 x double>
+  ret <2 x double> %v
+}
+
 define void @store_v2f64(<2 x double> %v, <2 x double>* %p) {
 ; CHECK-LABEL: store_v2f64:
 ; CHECK:         .functype store_v2f64 (v128, i32) -> ()


        


More information about the llvm-commits mailing list