[llvm] 275658d - [SelectionDAG] Implicitly truncate known bits in SPLAT_VECTOR

Fri Jan 6 07:44:00 PST 2023

Author: Luke Lau
Date: 2023-01-06T15:43:47Z
New Revision: 275658d1af11a2fbbbbad3d70629afc75b9e107c

URL: https://github.com/llvm/llvm-project/commit/275658d1af11a2fbbbbad3d70629afc75b9e107c
DIFF: https://github.com/llvm/llvm-project/commit/275658d1af11a2fbbbbad3d70629afc75b9e107c.diff

LOG: [SelectionDAG] Implicitly truncate known bits in SPLAT_VECTOR

Now that D139525 fixes the Hexagon infinite loop, the stopgap can be
removed to provide more information about known bits in SPLAT_VECTOR
whose operands are smaller than the bit width (which is most of the
time)

Reviewed By: reames

Differential Revision: https://reviews.llvm.org/D141075

Added: 
    

Modified: 
    llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
    llvm/test/CodeGen/AArch64/active_lane_mask.ll
    llvm/test/CodeGen/AArch64/sve-fixed-length-fp-select.ll
    llvm/test/CodeGen/AArch64/sve-fixed-length-int-select.ll
    llvm/test/CodeGen/AArch64/sve-umulo-sdnode.ll
    llvm/test/CodeGen/WebAssembly/pr59626.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index e1e95f7004fd..3805f70df07a 100644

--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -2977,17 +2977,11 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
                             Depth + 1);
   case ISD::SPLAT_VECTOR: {
     SDValue SrcOp = Op.getOperand(0);
-    if (SrcOp.getValueSizeInBits() != BitWidth) {
-      assert(SrcOp.getValueSizeInBits() > BitWidth &&
-             "Expected SPLAT_VECTOR implicit truncation");
-      // FIXME: We should be able to truncate the known bits here to match
-      // the official semantics of SPLAT_VECTOR, but doing so exposes a
-      // Hexagon target bug which results in an infinite loop during
-      // DAGCombine.  (See D137140 for repo).  Once that's fixed, we can
-      // strengthen this.
-      break;
-    }
-    Known = computeKnownBits(SrcOp, Depth + 1);
+    assert(SrcOp.getValueSizeInBits() >= BitWidth &&
+           "Expected SPLAT_VECTOR implicit truncation");
+    // Implicitly truncate the bits to match the official semantics of
+    // SPLAT_VECTOR.
+    Known = computeKnownBits(SrcOp, Depth + 1).trunc(BitWidth);
     break;
   }
   case ISD::BUILD_VECTOR:

diff  --git a/llvm/test/CodeGen/AArch64/active_lane_mask.ll b/llvm/test/CodeGen/AArch64/active_lane_mask.ll
index cb2b498f274b..09cb0aa28b10 100644
--- a/llvm/test/CodeGen/AArch64/active_lane_mask.ll
+++ b/llvm/test/CodeGen/AArch64/active_lane_mask.ll
@@ -100,7 +100,6 @@ define <vscale x 8 x i1> @lane_mask_nxv8i1_i8(i8 %index, i8 %TC) {
 ; CHECK-NEXT:    mov z1.h, w1
 ; CHECK-NEXT:    umin z0.h, z0.h, #255
 ; CHECK-NEXT:    and z1.h, z1.h, #0xff
-; CHECK-NEXT:    and z0.h, z0.h, #0xff
 ; CHECK-NEXT:    ptrue p0.h
 ; CHECK-NEXT:    cmphi p0.h, p0/z, z1.h, z0.h
 ; CHECK-NEXT:    ret

diff  --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-select.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-select.ll
index 95c312db049e..553acc793098 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-select.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-select.ll
@@ -40,7 +40,6 @@ define void @select_v16f16(ptr %a, ptr %b, i1 %mask) vscale_range(2,0) #0 {
 ; CHECK-NEXT:    ld1h { z1.h }, p0/z, [x1]
 ; CHECK-NEXT:    ptrue p1.h
 ; CHECK-NEXT:    mov z2.h, w8
-; CHECK-NEXT:    and z2.h, z2.h, #0x1
 ; CHECK-NEXT:    cmpne p1.h, p1/z, z2.h, #0
 ; CHECK-NEXT:    sel z0.h, p1, z0.h, z1.h
 ; CHECK-NEXT:    st1h { z0.h }, p0, [x0]
@@ -56,15 +55,14 @@ define void @select_v32f16(ptr %a, ptr %b, i1 %mask) #0 {
 ; VBITS_GE_256-LABEL: select_v32f16:
 ; VBITS_GE_256:       // %bb.0:
 ; VBITS_GE_256-NEXT:    mov x8, #16
-; VBITS_GE_256-NEXT:    and w9, w2, #0x1
 ; VBITS_GE_256-NEXT:    ptrue p0.h, vl16
+; VBITS_GE_256-NEXT:    and w9, w2, #0x1
 ; VBITS_GE_256-NEXT:    ptrue p1.h
 ; VBITS_GE_256-NEXT:    ld1h { z0.h }, p0/z, [x0, x8, lsl #1]
 ; VBITS_GE_256-NEXT:    ld1h { z1.h }, p0/z, [x0]
 ; VBITS_GE_256-NEXT:    ld1h { z2.h }, p0/z, [x1, x8, lsl #1]
 ; VBITS_GE_256-NEXT:    ld1h { z3.h }, p0/z, [x1]
 ; VBITS_GE_256-NEXT:    mov z4.h, w9
-; VBITS_GE_256-NEXT:    and z4.h, z4.h, #0x1
 ; VBITS_GE_256-NEXT:    cmpne p1.h, p1/z, z4.h, #0
 ; VBITS_GE_256-NEXT:    sel z1.h, p1, z1.h, z3.h
 ; VBITS_GE_256-NEXT:    sel z0.h, p1, z0.h, z2.h
@@ -80,7 +78,6 @@ define void @select_v32f16(ptr %a, ptr %b, i1 %mask) #0 {
 ; VBITS_GE_512-NEXT:    ld1h { z1.h }, p0/z, [x1]
 ; VBITS_GE_512-NEXT:    ptrue p1.h
 ; VBITS_GE_512-NEXT:    mov z2.h, w8
-; VBITS_GE_512-NEXT:    and z2.h, z2.h, #0x1
 ; VBITS_GE_512-NEXT:    cmpne p1.h, p1/z, z2.h, #0
 ; VBITS_GE_512-NEXT:    sel z0.h, p1, z0.h, z1.h
 ; VBITS_GE_512-NEXT:    st1h { z0.h }, p0, [x0]
@@ -101,7 +98,6 @@ define void @select_v64f16(ptr %a, ptr %b, i1 %mask) vscale_range(8,0) #0 {
 ; CHECK-NEXT:    ld1h { z1.h }, p0/z, [x1]
 ; CHECK-NEXT:    ptrue p1.h
 ; CHECK-NEXT:    mov z2.h, w8
-; CHECK-NEXT:    and z2.h, z2.h, #0x1
 ; CHECK-NEXT:    cmpne p1.h, p1/z, z2.h, #0
 ; CHECK-NEXT:    sel z0.h, p1, z0.h, z1.h
 ; CHECK-NEXT:    st1h { z0.h }, p0, [x0]
@@ -122,7 +118,6 @@ define void @select_v128f16(ptr %a, ptr %b, i1 %mask) vscale_range(16,0) #0 {
 ; CHECK-NEXT:    ld1h { z1.h }, p0/z, [x1]
 ; CHECK-NEXT:    ptrue p1.h
 ; CHECK-NEXT:    mov z2.h, w8
-; CHECK-NEXT:    and z2.h, z2.h, #0x1
 ; CHECK-NEXT:    cmpne p1.h, p1/z, z2.h, #0
 ; CHECK-NEXT:    sel z0.h, p1, z0.h, z1.h
 ; CHECK-NEXT:    st1h { z0.h }, p0, [x0]

diff  --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-select.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-select.ll
index e10a3531f8a0..19d7c4212842 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-select.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-select.ll
@@ -40,7 +40,6 @@ define void @select_v32i8(ptr %a, ptr %b, i1 %mask) vscale_range(2,0) #0 {
 ; CHECK-NEXT:    ld1b { z1.b }, p0/z, [x1]
 ; CHECK-NEXT:    ptrue p1.b
 ; CHECK-NEXT:    mov z2.b, w8
-; CHECK-NEXT:    and z2.b, z2.b, #0x1
 ; CHECK-NEXT:    cmpne p1.b, p1/z, z2.b, #0
 ; CHECK-NEXT:    sel z0.b, p1, z0.b, z1.b
 ; CHECK-NEXT:    st1b { z0.b }, p0, [x0]
@@ -56,15 +55,14 @@ define void @select_v64i8(ptr %a, ptr %b, i1 %mask) #0 {
 ; VBITS_GE_256-LABEL: select_v64i8:
 ; VBITS_GE_256:       // %bb.0:
 ; VBITS_GE_256-NEXT:    mov w8, #32
-; VBITS_GE_256-NEXT:    and w9, w2, #0x1
 ; VBITS_GE_256-NEXT:    ptrue p0.b, vl32
+; VBITS_GE_256-NEXT:    and w9, w2, #0x1
 ; VBITS_GE_256-NEXT:    ptrue p1.b
 ; VBITS_GE_256-NEXT:    ld1b { z0.b }, p0/z, [x0, x8]
 ; VBITS_GE_256-NEXT:    ld1b { z1.b }, p0/z, [x0]
 ; VBITS_GE_256-NEXT:    ld1b { z2.b }, p0/z, [x1, x8]
 ; VBITS_GE_256-NEXT:    ld1b { z3.b }, p0/z, [x1]
 ; VBITS_GE_256-NEXT:    mov z4.b, w9
-; VBITS_GE_256-NEXT:    and z4.b, z4.b, #0x1
 ; VBITS_GE_256-NEXT:    cmpne p1.b, p1/z, z4.b, #0
 ; VBITS_GE_256-NEXT:    sel z1.b, p1, z1.b, z3.b
 ; VBITS_GE_256-NEXT:    sel z0.b, p1, z0.b, z2.b
@@ -80,7 +78,6 @@ define void @select_v64i8(ptr %a, ptr %b, i1 %mask) #0 {
 ; VBITS_GE_512-NEXT:    ld1b { z1.b }, p0/z, [x1]
 ; VBITS_GE_512-NEXT:    ptrue p1.b
 ; VBITS_GE_512-NEXT:    mov z2.b, w8
-; VBITS_GE_512-NEXT:    and z2.b, z2.b, #0x1
 ; VBITS_GE_512-NEXT:    cmpne p1.b, p1/z, z2.b, #0
 ; VBITS_GE_512-NEXT:    sel z0.b, p1, z0.b, z1.b
 ; VBITS_GE_512-NEXT:    st1b { z0.b }, p0, [x0]
@@ -101,7 +98,6 @@ define void @select_v128i8(ptr %a, ptr %b, i1 %mask) vscale_range(8,0) #0 {
 ; CHECK-NEXT:    ld1b { z1.b }, p0/z, [x1]
 ; CHECK-NEXT:    ptrue p1.b
 ; CHECK-NEXT:    mov z2.b, w8
-; CHECK-NEXT:    and z2.b, z2.b, #0x1
 ; CHECK-NEXT:    cmpne p1.b, p1/z, z2.b, #0
 ; CHECK-NEXT:    sel z0.b, p1, z0.b, z1.b
 ; CHECK-NEXT:    st1b { z0.b }, p0, [x0]
@@ -122,7 +118,6 @@ define void @select_v256i8(ptr %a, ptr %b, i1 %mask) vscale_range(16,0) #0 {
 ; CHECK-NEXT:    ld1b { z1.b }, p0/z, [x1]
 ; CHECK-NEXT:    ptrue p1.b
 ; CHECK-NEXT:    mov z2.b, w8
-; CHECK-NEXT:    and z2.b, z2.b, #0x1
 ; CHECK-NEXT:    cmpne p1.b, p1/z, z2.b, #0
 ; CHECK-NEXT:    sel z0.b, p1, z0.b, z1.b
 ; CHECK-NEXT:    st1b { z0.b }, p0, [x0]
@@ -169,7 +164,6 @@ define void @select_v16i16(ptr %a, ptr %b, i1 %mask) vscale_range(2,0) #0 {
 ; CHECK-NEXT:    ld1h { z1.h }, p0/z, [x1]
 ; CHECK-NEXT:    ptrue p1.h
 ; CHECK-NEXT:    mov z2.h, w8
-; CHECK-NEXT:    and z2.h, z2.h, #0x1
 ; CHECK-NEXT:    cmpne p1.h, p1/z, z2.h, #0
 ; CHECK-NEXT:    sel z0.h, p1, z0.h, z1.h
 ; CHECK-NEXT:    st1h { z0.h }, p0, [x0]
@@ -185,15 +179,14 @@ define void @select_v32i16(ptr %a, ptr %b, i1 %mask) #0 {
 ; VBITS_GE_256-LABEL: select_v32i16:
 ; VBITS_GE_256:       // %bb.0:
 ; VBITS_GE_256-NEXT:    mov x8, #16
-; VBITS_GE_256-NEXT:    and w9, w2, #0x1
 ; VBITS_GE_256-NEXT:    ptrue p0.h, vl16
+; VBITS_GE_256-NEXT:    and w9, w2, #0x1
 ; VBITS_GE_256-NEXT:    ptrue p1.h
 ; VBITS_GE_256-NEXT:    ld1h { z0.h }, p0/z, [x0, x8, lsl #1]
 ; VBITS_GE_256-NEXT:    ld1h { z1.h }, p0/z, [x0]
 ; VBITS_GE_256-NEXT:    ld1h { z2.h }, p0/z, [x1, x8, lsl #1]
 ; VBITS_GE_256-NEXT:    ld1h { z3.h }, p0/z, [x1]
 ; VBITS_GE_256-NEXT:    mov z4.h, w9
-; VBITS_GE_256-NEXT:    and z4.h, z4.h, #0x1
 ; VBITS_GE_256-NEXT:    cmpne p1.h, p1/z, z4.h, #0
 ; VBITS_GE_256-NEXT:    sel z1.h, p1, z1.h, z3.h
 ; VBITS_GE_256-NEXT:    sel z0.h, p1, z0.h, z2.h
@@ -209,7 +202,6 @@ define void @select_v32i16(ptr %a, ptr %b, i1 %mask) #0 {
 ; VBITS_GE_512-NEXT:    ld1h { z1.h }, p0/z, [x1]
 ; VBITS_GE_512-NEXT:    ptrue p1.h
 ; VBITS_GE_512-NEXT:    mov z2.h, w8
-; VBITS_GE_512-NEXT:    and z2.h, z2.h, #0x1
 ; VBITS_GE_512-NEXT:    cmpne p1.h, p1/z, z2.h, #0
 ; VBITS_GE_512-NEXT:    sel z0.h, p1, z0.h, z1.h
 ; VBITS_GE_512-NEXT:    st1h { z0.h }, p0, [x0]
@@ -230,7 +222,6 @@ define void @select_v64i16(ptr %a, ptr %b, i1 %mask) vscale_range(8,0) #0 {
 ; CHECK-NEXT:    ld1h { z1.h }, p0/z, [x1]
 ; CHECK-NEXT:    ptrue p1.h
 ; CHECK-NEXT:    mov z2.h, w8
-; CHECK-NEXT:    and z2.h, z2.h, #0x1
 ; CHECK-NEXT:    cmpne p1.h, p1/z, z2.h, #0
 ; CHECK-NEXT:    sel z0.h, p1, z0.h, z1.h
 ; CHECK-NEXT:    st1h { z0.h }, p0, [x0]
@@ -251,7 +242,6 @@ define void @select_v128i16(ptr %a, ptr %b, i1 %mask) vscale_range(16,0) #0 {
 ; CHECK-NEXT:    ld1h { z1.h }, p0/z, [x1]
 ; CHECK-NEXT:    ptrue p1.h
 ; CHECK-NEXT:    mov z2.h, w8
-; CHECK-NEXT:    and z2.h, z2.h, #0x1
 ; CHECK-NEXT:    cmpne p1.h, p1/z, z2.h, #0
 ; CHECK-NEXT:    sel z0.h, p1, z0.h, z1.h
 ; CHECK-NEXT:    st1h { z0.h }, p0, [x0]

diff  --git a/llvm/test/CodeGen/AArch64/sve-umulo-sdnode.ll b/llvm/test/CodeGen/AArch64/sve-umulo-sdnode.ll
index 8b2455bff61e..8ef7b8032cc0 100644
--- a/llvm/test/CodeGen/AArch64/sve-umulo-sdnode.ll
+++ b/llvm/test/CodeGen/AArch64/sve-umulo-sdnode.ll
@@ -49,15 +49,10 @@ define <vscale x 8 x i8> @umulo_nxv8i8(<vscale x 8 x i8> %x, <vscale x 8 x i8> %
 ; CHECK-NEXT:    ptrue p0.h
 ; CHECK-NEXT:    and z1.h, z1.h, #0xff
 ; CHECK-NEXT:    and z0.h, z0.h, #0xff
-; CHECK-NEXT:    movprfx z2, z0
-; CHECK-NEXT:    mul z2.h, p0/m, z2.h, z1.h
-; CHECK-NEXT:    umulh z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT:    lsr z1.h, z2.h, #8
-; CHECK-NEXT:    cmpne p1.h, p0/z, z0.h, #0
+; CHECK-NEXT:    mul z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT:    lsr z1.h, z0.h, #8
 ; CHECK-NEXT:    cmpne p0.h, p0/z, z1.h, #0
-; CHECK-NEXT:    sel p0.b, p0, p0.b, p1.b
-; CHECK-NEXT:    mov z2.h, p0/m, #0 // =0x0
-; CHECK-NEXT:    mov z0.d, z2.d
+; CHECK-NEXT:    mov z0.h, p0/m, #0 // =0x0
 ; CHECK-NEXT:    ret
   %a = call { <vscale x 8 x i8>, <vscale x 8 x i1> } @llvm.umul.with.overflow.nxv8i8(<vscale x 8 x i8> %x, <vscale x 8 x i8> %y)
   %b = extractvalue { <vscale x 8 x i8>, <vscale x 8 x i1> } %a, 0

diff  --git a/llvm/test/CodeGen/WebAssembly/pr59626.ll b/llvm/test/CodeGen/WebAssembly/pr59626.ll
index 33b85fb2de09..a2324b0b48a7 100644
--- a/llvm/test/CodeGen/WebAssembly/pr59626.ll
+++ b/llvm/test/CodeGen/WebAssembly/pr59626.ll
@@ -5,79 +5,38 @@
 define i8 @f(ptr %0, ptr %1) {
 ; CHECK-32-LABEL: f:
 ; CHECK-32:         .functype f (i32, i32) -> (i32)
-; CHECK-32-NEXT:    .local v128
 ; CHECK-32-NEXT:  # %bb.0: # %BB
 ; CHECK-32-NEXT:    local.get 0
 ; CHECK-32-NEXT:    i32.const 0
 ; CHECK-32-NEXT:    i32.store8 2
 ; CHECK-32-NEXT:    local.get 0
-; CHECK-32-NEXT:    v128.const 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
-; CHECK-32-NEXT:    v128.store16_lane 0, 0
-; CHECK-32-NEXT:    local.get 1
-; CHECK-32-NEXT:    i32.const 5
-; CHECK-32-NEXT:    v128.const 0, 0
-; CHECK-32-NEXT:    i32x4.extract_lane 0
-; CHECK-32-NEXT:    i8x16.splat
-; CHECK-32-NEXT:    v128.const 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
-; CHECK-32-NEXT:    v128.and
-; CHECK-32-NEXT:    local.tee 2
-; CHECK-32-NEXT:    i8x16.extract_lane_u 2
-; CHECK-32-NEXT:    i32.div_u
-; CHECK-32-NEXT:    i32.store8 2
+; CHECK-32-NEXT:    i32.const 0
+; CHECK-32-NEXT:    i32.store16 0
 ; CHECK-32-NEXT:    local.get 1
-; CHECK-32-NEXT:    i32.const 1
-; CHECK-32-NEXT:    local.get 2
-; CHECK-32-NEXT:    i8x16.extract_lane_u 0
-; CHECK-32-NEXT:    local.tee 0
-; CHECK-32-NEXT:    i32.const 1
-; CHECK-32-NEXT:    i32.and
-; CHECK-32-NEXT:    i32.div_u
+; CHECK-32-NEXT:    local.get 0
 ; CHECK-32-NEXT:    i8x16.splat
-; CHECK-32-NEXT:    i32.const 3
-; CHECK-32-NEXT:    local.get 2
-; CHECK-32-NEXT:    i8x16.extract_lane_u 1
-; CHECK-32-NEXT:    i32.div_u
-; CHECK-32-NEXT:    i8x16.replace_lane 1
 ; CHECK-32-NEXT:    v128.store16_lane 0, 0
-; CHECK-32-NEXT:    local.get 0
+; CHECK-32-NEXT:    v128.const 0, 0
+; CHECK-32-NEXT:    i32x4.extract_lane 0
 ; CHECK-32-NEXT:    # fallthrough-return
 ;
 ; CHECK-64-LABEL: f:
 ; CHECK-64:         .functype f (i64, i64) -> (i32)
-; CHECK-64-NEXT:    .local v128, i32
+; CHECK-64-NEXT:    .local i32
 ; CHECK-64-NEXT:  # %bb.0: # %BB
 ; CHECK-64-NEXT:    local.get 0
 ; CHECK-64-NEXT:    i32.const 0
 ; CHECK-64-NEXT:    i32.store8 2
 ; CHECK-64-NEXT:    local.get 0
-; CHECK-64-NEXT:    v128.const 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
-; CHECK-64-NEXT:    v128.store16_lane 0, 0
-; CHECK-64-NEXT:    drop
-; CHECK-64-NEXT:    local.get 1
-; CHECK-64-NEXT:    i32.const 5
-; CHECK-64-NEXT:    v128.const 0, 0
-; CHECK-64-NEXT:    i32x4.extract_lane 0
-; CHECK-64-NEXT:    i8x16.splat
-; CHECK-64-NEXT:    v128.const 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
-; CHECK-64-NEXT:    v128.and
-; CHECK-64-NEXT:    local.tee 2
-; CHECK-64-NEXT:    i8x16.extract_lane_u 2
-; CHECK-64-NEXT:    i32.const 1
-; CHECK-64-NEXT:    i32.and
-; CHECK-64-NEXT:    i32.div_u
-; CHECK-64-NEXT:    i32.store8 2
+; CHECK-64-NEXT:    i32.const 0
+; CHECK-64-NEXT:    i32.store16 0
 ; CHECK-64-NEXT:    local.get 1
-; CHECK-64-NEXT:    i32.const 1
 ; CHECK-64-NEXT:    local.get 2
-; CHECK-64-NEXT:    i8x16.extract_lane_u 0
-; CHECK-64-NEXT:    local.tee 3
-; CHECK-64-NEXT:    i32.const 1
-; CHECK-64-NEXT:    i32.and
-; CHECK-64-NEXT:    i32.div_u
 ; CHECK-64-NEXT:    i8x16.splat
 ; CHECK-64-NEXT:    v128.store16_lane 0, 0
 ; CHECK-64-NEXT:    drop
-; CHECK-64-NEXT:    local.get 3
+; CHECK-64-NEXT:    v128.const 0, 0
+; CHECK-64-NEXT:    i32x4.extract_lane 0
 ; CHECK-64-NEXT:    # fallthrough-return
 BB:
   store <3 x i8> zeroinitializer, ptr %0