[llvm] 2e07adc - Scalarizer: precommit some tests

Nicolai Hähnle via llvm-commits llvm-commits at lists.llvm.org
Fri May 5 04:06:18 PDT 2023


Author: Nicolai Hähnle
Date: 2023-05-05T13:05:31+02:00
New Revision: 2e07adcd3a2acdef80a0b750bd60065cccd924c2

URL: https://github.com/llvm/llvm-project/commit/2e07adcd3a2acdef80a0b750bd60065cccd924c2
DIFF: https://github.com/llvm/llvm-project/commit/2e07adcd3a2acdef80a0b750bd60065cccd924c2.diff

LOG: Scalarizer: precommit some tests

The tests are pulled out from https://reviews.llvm.org/D149842 as a
preparatory change.

Differential Revision: https://reviews.llvm.org/D149945

Added: 
    llvm/test/Transforms/Scalarizer/min-bits.ll

Modified: 
    

Removed: 
    


################################################################################
diff  --git a/llvm/test/Transforms/Scalarizer/min-bits.ll b/llvm/test/Transforms/Scalarizer/min-bits.ll
new file mode 100644
index 000000000000..1ad5fe37e8ca
--- /dev/null
+++ b/llvm/test/Transforms/Scalarizer/min-bits.ll
@@ -0,0 +1,793 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt %s -passes='function(scalarizer,dce)' -scalarize-load-store -S | FileCheck %s --check-prefixes=CHECK
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+define void @load_add_store_v2i16(ptr %pa, ptr %pb) {
+; CHECK-LABEL: @load_add_store_v2i16(
+; CHECK-NEXT:    [[PB_I1:%.*]] = getelementptr i16, ptr [[PB:%.*]], i32 1
+; CHECK-NEXT:    [[A_I0:%.*]] = load i16, ptr [[PA:%.*]], align 8
+; CHECK-NEXT:    [[PA_I1:%.*]] = getelementptr i16, ptr [[PA]], i32 1
+; CHECK-NEXT:    [[A_I1:%.*]] = load i16, ptr [[PA_I1]], align 2
+; CHECK-NEXT:    [[B_I0:%.*]] = load i16, ptr [[PB]], align 8
+; CHECK-NEXT:    [[B_I1:%.*]] = load i16, ptr [[PB_I1]], align 2
+; CHECK-NEXT:    [[C_I0:%.*]] = add i16 [[A_I0]], [[B_I0]]
+; CHECK-NEXT:    [[C_I1:%.*]] = add i16 [[A_I1]], [[B_I1]]
+; CHECK-NEXT:    store i16 [[C_I0]], ptr [[PA]], align 8
+; CHECK-NEXT:    store i16 [[C_I1]], ptr [[PA_I1]], align 2
+; CHECK-NEXT:    ret void
+;
+  %a = load <2 x i16>, ptr %pa, align 8
+  %b = load <2 x i16>, ptr %pb, align 8
+  %c = add <2 x i16> %a, %b
+  store <2 x i16> %c, ptr %pa, align 8
+  ret void
+}
+
+define void @load_add_store_v3i16(ptr %pa, ptr %pb) {
+; CHECK-LABEL: @load_add_store_v3i16(
+; CHECK-NEXT:    [[PB_I1:%.*]] = getelementptr i16, ptr [[PB:%.*]], i32 1
+; CHECK-NEXT:    [[PB_I2:%.*]] = getelementptr i16, ptr [[PB]], i32 2
+; CHECK-NEXT:    [[A_I0:%.*]] = load i16, ptr [[PA:%.*]], align 8
+; CHECK-NEXT:    [[PA_I1:%.*]] = getelementptr i16, ptr [[PA]], i32 1
+; CHECK-NEXT:    [[A_I1:%.*]] = load i16, ptr [[PA_I1]], align 2
+; CHECK-NEXT:    [[PA_I2:%.*]] = getelementptr i16, ptr [[PA]], i32 2
+; CHECK-NEXT:    [[A_I2:%.*]] = load i16, ptr [[PA_I2]], align 4
+; CHECK-NEXT:    [[B_I0:%.*]] = load i16, ptr [[PB]], align 8
+; CHECK-NEXT:    [[B_I1:%.*]] = load i16, ptr [[PB_I1]], align 2
+; CHECK-NEXT:    [[B_I2:%.*]] = load i16, ptr [[PB_I2]], align 4
+; CHECK-NEXT:    [[C_I0:%.*]] = add i16 [[A_I0]], [[B_I0]]
+; CHECK-NEXT:    [[C_I1:%.*]] = add i16 [[A_I1]], [[B_I1]]
+; CHECK-NEXT:    [[C_I2:%.*]] = add i16 [[A_I2]], [[B_I2]]
+; CHECK-NEXT:    store i16 [[C_I0]], ptr [[PA]], align 8
+; CHECK-NEXT:    store i16 [[C_I1]], ptr [[PA_I1]], align 2
+; CHECK-NEXT:    store i16 [[C_I2]], ptr [[PA_I2]], align 4
+; CHECK-NEXT:    ret void
+;
+  %a = load <3 x i16>, ptr %pa, align 8
+  %b = load <3 x i16>, ptr %pb, align 8
+  %c = add <3 x i16> %a, %b
+  store <3 x i16> %c, ptr %pa, align 8
+  ret void
+}
+
+define void @load_add_store_v4i16(ptr %pa, ptr %pb) {
+; CHECK-LABEL: @load_add_store_v4i16(
+; CHECK-NEXT:    [[PB_I1:%.*]] = getelementptr i16, ptr [[PB:%.*]], i32 1
+; CHECK-NEXT:    [[PB_I2:%.*]] = getelementptr i16, ptr [[PB]], i32 2
+; CHECK-NEXT:    [[PB_I3:%.*]] = getelementptr i16, ptr [[PB]], i32 3
+; CHECK-NEXT:    [[A_I0:%.*]] = load i16, ptr [[PA:%.*]], align 8
+; CHECK-NEXT:    [[PA_I1:%.*]] = getelementptr i16, ptr [[PA]], i32 1
+; CHECK-NEXT:    [[A_I1:%.*]] = load i16, ptr [[PA_I1]], align 2
+; CHECK-NEXT:    [[PA_I2:%.*]] = getelementptr i16, ptr [[PA]], i32 2
+; CHECK-NEXT:    [[A_I2:%.*]] = load i16, ptr [[PA_I2]], align 4
+; CHECK-NEXT:    [[PA_I3:%.*]] = getelementptr i16, ptr [[PA]], i32 3
+; CHECK-NEXT:    [[A_I3:%.*]] = load i16, ptr [[PA_I3]], align 2
+; CHECK-NEXT:    [[B_I0:%.*]] = load i16, ptr [[PB]], align 8
+; CHECK-NEXT:    [[B_I1:%.*]] = load i16, ptr [[PB_I1]], align 2
+; CHECK-NEXT:    [[B_I2:%.*]] = load i16, ptr [[PB_I2]], align 4
+; CHECK-NEXT:    [[B_I3:%.*]] = load i16, ptr [[PB_I3]], align 2
+; CHECK-NEXT:    [[C_I0:%.*]] = add i16 [[A_I0]], [[B_I0]]
+; CHECK-NEXT:    [[C_I1:%.*]] = add i16 [[A_I1]], [[B_I1]]
+; CHECK-NEXT:    [[C_I2:%.*]] = add i16 [[A_I2]], [[B_I2]]
+; CHECK-NEXT:    [[C_I3:%.*]] = add i16 [[A_I3]], [[B_I3]]
+; CHECK-NEXT:    store i16 [[C_I0]], ptr [[PA]], align 8
+; CHECK-NEXT:    store i16 [[C_I1]], ptr [[PA_I1]], align 2
+; CHECK-NEXT:    store i16 [[C_I2]], ptr [[PA_I2]], align 4
+; CHECK-NEXT:    store i16 [[C_I3]], ptr [[PA_I3]], align 2
+; CHECK-NEXT:    ret void
+;
+  %a = load <4 x i16>, ptr %pa, align 8
+  %b = load <4 x i16>, ptr %pb, align 8
+  %c = add <4 x i16> %a, %b
+  store <4 x i16> %c, ptr %pa, align 8
+  ret void
+}
+
+define <2 x half> @select_uniform_condition_v2f16(<2 x half> %a, <2 x half> %b, i1 %cc) {
+; CHECK-LABEL: @select_uniform_condition_v2f16(
+; CHECK-NEXT:    [[A_I0:%.*]] = extractelement <2 x half> [[A:%.*]], i64 0
+; CHECK-NEXT:    [[B_I0:%.*]] = extractelement <2 x half> [[B:%.*]], i64 0
+; CHECK-NEXT:    [[R_I0:%.*]] = select i1 [[CC:%.*]], half [[A_I0]], half [[B_I0]]
+; CHECK-NEXT:    [[A_I1:%.*]] = extractelement <2 x half> [[A]], i64 1
+; CHECK-NEXT:    [[B_I1:%.*]] = extractelement <2 x half> [[B]], i64 1
+; CHECK-NEXT:    [[R_I1:%.*]] = select i1 [[CC]], half [[A_I1]], half [[B_I1]]
+; CHECK-NEXT:    [[R_UPTO0:%.*]] = insertelement <2 x half> poison, half [[R_I0]], i64 0
+; CHECK-NEXT:    [[R:%.*]] = insertelement <2 x half> [[R_UPTO0]], half [[R_I1]], i64 1
+; CHECK-NEXT:    ret <2 x half> [[R]]
+;
+  %r = select i1 %cc, <2 x half> %a, <2 x half> %b
+  ret <2 x half> %r
+}
+
+define <3 x half> @select_uniform_condition_v3f16(<3 x half> %a, <3 x half> %b, i1 %cc) {
+; CHECK-LABEL: @select_uniform_condition_v3f16(
+; CHECK-NEXT:    [[A_I0:%.*]] = extractelement <3 x half> [[A:%.*]], i64 0
+; CHECK-NEXT:    [[B_I0:%.*]] = extractelement <3 x half> [[B:%.*]], i64 0
+; CHECK-NEXT:    [[R_I0:%.*]] = select i1 [[CC:%.*]], half [[A_I0]], half [[B_I0]]
+; CHECK-NEXT:    [[A_I1:%.*]] = extractelement <3 x half> [[A]], i64 1
+; CHECK-NEXT:    [[B_I1:%.*]] = extractelement <3 x half> [[B]], i64 1
+; CHECK-NEXT:    [[R_I1:%.*]] = select i1 [[CC]], half [[A_I1]], half [[B_I1]]
+; CHECK-NEXT:    [[A_I2:%.*]] = extractelement <3 x half> [[A]], i64 2
+; CHECK-NEXT:    [[B_I2:%.*]] = extractelement <3 x half> [[B]], i64 2
+; CHECK-NEXT:    [[R_I2:%.*]] = select i1 [[CC]], half [[A_I2]], half [[B_I2]]
+; CHECK-NEXT:    [[R_UPTO0:%.*]] = insertelement <3 x half> poison, half [[R_I0]], i64 0
+; CHECK-NEXT:    [[R_UPTO1:%.*]] = insertelement <3 x half> [[R_UPTO0]], half [[R_I1]], i64 1
+; CHECK-NEXT:    [[R:%.*]] = insertelement <3 x half> [[R_UPTO1]], half [[R_I2]], i64 2
+; CHECK-NEXT:    ret <3 x half> [[R]]
+;
+  %r = select i1 %cc, <3 x half> %a, <3 x half> %b
+  ret <3 x half> %r
+}
+
+define <4 x half> @select_uniform_condition_v4f16(<4 x half> %a, <4 x half> %b, i1 %cc) {
+; CHECK-LABEL: @select_uniform_condition_v4f16(
+; CHECK-NEXT:    [[A_I0:%.*]] = extractelement <4 x half> [[A:%.*]], i64 0
+; CHECK-NEXT:    [[B_I0:%.*]] = extractelement <4 x half> [[B:%.*]], i64 0
+; CHECK-NEXT:    [[R_I0:%.*]] = select i1 [[CC:%.*]], half [[A_I0]], half [[B_I0]]
+; CHECK-NEXT:    [[A_I1:%.*]] = extractelement <4 x half> [[A]], i64 1
+; CHECK-NEXT:    [[B_I1:%.*]] = extractelement <4 x half> [[B]], i64 1
+; CHECK-NEXT:    [[R_I1:%.*]] = select i1 [[CC]], half [[A_I1]], half [[B_I1]]
+; CHECK-NEXT:    [[A_I2:%.*]] = extractelement <4 x half> [[A]], i64 2
+; CHECK-NEXT:    [[B_I2:%.*]] = extractelement <4 x half> [[B]], i64 2
+; CHECK-NEXT:    [[R_I2:%.*]] = select i1 [[CC]], half [[A_I2]], half [[B_I2]]
+; CHECK-NEXT:    [[A_I3:%.*]] = extractelement <4 x half> [[A]], i64 3
+; CHECK-NEXT:    [[B_I3:%.*]] = extractelement <4 x half> [[B]], i64 3
+; CHECK-NEXT:    [[R_I3:%.*]] = select i1 [[CC]], half [[A_I3]], half [[B_I3]]
+; CHECK-NEXT:    [[R_UPTO0:%.*]] = insertelement <4 x half> poison, half [[R_I0]], i64 0
+; CHECK-NEXT:    [[R_UPTO1:%.*]] = insertelement <4 x half> [[R_UPTO0]], half [[R_I1]], i64 1
+; CHECK-NEXT:    [[R_UPTO2:%.*]] = insertelement <4 x half> [[R_UPTO1]], half [[R_I2]], i64 2
+; CHECK-NEXT:    [[R:%.*]] = insertelement <4 x half> [[R_UPTO2]], half [[R_I3]], i64 3
+; CHECK-NEXT:    ret <4 x half> [[R]]
+;
+  %r = select i1 %cc, <4 x half> %a, <4 x half> %b
+  ret <4 x half> %r
+}
+
+define <4 x half> @select_vector_condition_v4f16(<4 x half> %a, <4 x half> %b, <4 x i1> %cc) {
+; CHECK-LABEL: @select_vector_condition_v4f16(
+; CHECK-NEXT:    [[CC_I0:%.*]] = extractelement <4 x i1> [[CC:%.*]], i64 0
+; CHECK-NEXT:    [[A_I0:%.*]] = extractelement <4 x half> [[A:%.*]], i64 0
+; CHECK-NEXT:    [[B_I0:%.*]] = extractelement <4 x half> [[B:%.*]], i64 0
+; CHECK-NEXT:    [[R_I0:%.*]] = select i1 [[CC_I0]], half [[A_I0]], half [[B_I0]]
+; CHECK-NEXT:    [[CC_I1:%.*]] = extractelement <4 x i1> [[CC]], i64 1
+; CHECK-NEXT:    [[A_I1:%.*]] = extractelement <4 x half> [[A]], i64 1
+; CHECK-NEXT:    [[B_I1:%.*]] = extractelement <4 x half> [[B]], i64 1
+; CHECK-NEXT:    [[R_I1:%.*]] = select i1 [[CC_I1]], half [[A_I1]], half [[B_I1]]
+; CHECK-NEXT:    [[CC_I2:%.*]] = extractelement <4 x i1> [[CC]], i64 2
+; CHECK-NEXT:    [[A_I2:%.*]] = extractelement <4 x half> [[A]], i64 2
+; CHECK-NEXT:    [[B_I2:%.*]] = extractelement <4 x half> [[B]], i64 2
+; CHECK-NEXT:    [[R_I2:%.*]] = select i1 [[CC_I2]], half [[A_I2]], half [[B_I2]]
+; CHECK-NEXT:    [[CC_I3:%.*]] = extractelement <4 x i1> [[CC]], i64 3
+; CHECK-NEXT:    [[A_I3:%.*]] = extractelement <4 x half> [[A]], i64 3
+; CHECK-NEXT:    [[B_I3:%.*]] = extractelement <4 x half> [[B]], i64 3
+; CHECK-NEXT:    [[R_I3:%.*]] = select i1 [[CC_I3]], half [[A_I3]], half [[B_I3]]
+; CHECK-NEXT:    [[R_UPTO0:%.*]] = insertelement <4 x half> poison, half [[R_I0]], i64 0
+; CHECK-NEXT:    [[R_UPTO1:%.*]] = insertelement <4 x half> [[R_UPTO0]], half [[R_I1]], i64 1
+; CHECK-NEXT:    [[R_UPTO2:%.*]] = insertelement <4 x half> [[R_UPTO1]], half [[R_I2]], i64 2
+; CHECK-NEXT:    [[R:%.*]] = insertelement <4 x half> [[R_UPTO2]], half [[R_I3]], i64 3
+; CHECK-NEXT:    ret <4 x half> [[R]]
+;
+  %r = select <4 x i1> %cc, <4 x half> %a, <4 x half> %b
+  ret <4 x half> %r
+}
+
+define <2 x half> @unary_v2f16(<2 x half> %a) {
+; CHECK-LABEL: @unary_v2f16(
+; CHECK-NEXT:    [[A_I0:%.*]] = extractelement <2 x half> [[A:%.*]], i64 0
+; CHECK-NEXT:    [[R_I0:%.*]] = fneg half [[A_I0]]
+; CHECK-NEXT:    [[A_I1:%.*]] = extractelement <2 x half> [[A]], i64 1
+; CHECK-NEXT:    [[R_I1:%.*]] = fneg half [[A_I1]]
+; CHECK-NEXT:    [[R_UPTO0:%.*]] = insertelement <2 x half> poison, half [[R_I0]], i64 0
+; CHECK-NEXT:    [[R:%.*]] = insertelement <2 x half> [[R_UPTO0]], half [[R_I1]], i64 1
+; CHECK-NEXT:    ret <2 x half> [[R]]
+;
+  %r = fneg <2 x half> %a
+  ret <2 x half> %r
+}
+
+define <3 x half> @unary_v3f16(<3 x half> %a) {
+; CHECK-LABEL: @unary_v3f16(
+; CHECK-NEXT:    [[A_I0:%.*]] = extractelement <3 x half> [[A:%.*]], i64 0
+; CHECK-NEXT:    [[R_I0:%.*]] = fneg half [[A_I0]]
+; CHECK-NEXT:    [[A_I1:%.*]] = extractelement <3 x half> [[A]], i64 1
+; CHECK-NEXT:    [[R_I1:%.*]] = fneg half [[A_I1]]
+; CHECK-NEXT:    [[A_I2:%.*]] = extractelement <3 x half> [[A]], i64 2
+; CHECK-NEXT:    [[R_I2:%.*]] = fneg half [[A_I2]]
+; CHECK-NEXT:    [[R_UPTO0:%.*]] = insertelement <3 x half> poison, half [[R_I0]], i64 0
+; CHECK-NEXT:    [[R_UPTO1:%.*]] = insertelement <3 x half> [[R_UPTO0]], half [[R_I1]], i64 1
+; CHECK-NEXT:    [[R:%.*]] = insertelement <3 x half> [[R_UPTO1]], half [[R_I2]], i64 2
+; CHECK-NEXT:    ret <3 x half> [[R]]
+;
+  %r = fneg <3 x half> %a
+  ret <3 x half> %r
+}
+
+define <4 x half> @unary_v4f16(<4 x half> %a) {
+; CHECK-LABEL: @unary_v4f16(
+; CHECK-NEXT:    [[A_I0:%.*]] = extractelement <4 x half> [[A:%.*]], i64 0
+; CHECK-NEXT:    [[R_I0:%.*]] = fneg half [[A_I0]]
+; CHECK-NEXT:    [[A_I1:%.*]] = extractelement <4 x half> [[A]], i64 1
+; CHECK-NEXT:    [[R_I1:%.*]] = fneg half [[A_I1]]
+; CHECK-NEXT:    [[A_I2:%.*]] = extractelement <4 x half> [[A]], i64 2
+; CHECK-NEXT:    [[R_I2:%.*]] = fneg half [[A_I2]]
+; CHECK-NEXT:    [[A_I3:%.*]] = extractelement <4 x half> [[A]], i64 3
+; CHECK-NEXT:    [[R_I3:%.*]] = fneg half [[A_I3]]
+; CHECK-NEXT:    [[R_UPTO0:%.*]] = insertelement <4 x half> poison, half [[R_I0]], i64 0
+; CHECK-NEXT:    [[R_UPTO1:%.*]] = insertelement <4 x half> [[R_UPTO0]], half [[R_I1]], i64 1
+; CHECK-NEXT:    [[R_UPTO2:%.*]] = insertelement <4 x half> [[R_UPTO1]], half [[R_I2]], i64 2
+; CHECK-NEXT:    [[R:%.*]] = insertelement <4 x half> [[R_UPTO2]], half [[R_I3]], i64 3
+; CHECK-NEXT:    ret <4 x half> [[R]]
+;
+  %r = fneg <4 x half> %a
+  ret <4 x half> %r
+}
+
+define <2 x half> @binary_v2f16(<2 x half> %a, <2 x half> %b) {
+; CHECK-LABEL: @binary_v2f16(
+; CHECK-NEXT:    [[A_I0:%.*]] = extractelement <2 x half> [[A:%.*]], i64 0
+; CHECK-NEXT:    [[B_I0:%.*]] = extractelement <2 x half> [[B:%.*]], i64 0
+; CHECK-NEXT:    [[R_I0:%.*]] = fadd half [[A_I0]], [[B_I0]]
+; CHECK-NEXT:    [[A_I1:%.*]] = extractelement <2 x half> [[A]], i64 1
+; CHECK-NEXT:    [[B_I1:%.*]] = extractelement <2 x half> [[B]], i64 1
+; CHECK-NEXT:    [[R_I1:%.*]] = fadd half [[A_I1]], [[B_I1]]
+; CHECK-NEXT:    [[R_UPTO0:%.*]] = insertelement <2 x half> poison, half [[R_I0]], i64 0
+; CHECK-NEXT:    [[R:%.*]] = insertelement <2 x half> [[R_UPTO0]], half [[R_I1]], i64 1
+; CHECK-NEXT:    ret <2 x half> [[R]]
+;
+  %r = fadd <2 x half> %a, %b
+  ret <2 x half> %r
+}
+
+define <3 x half> @binary_v3f16(<3 x half> %a, <3 x half> %b) {
+; CHECK-LABEL: @binary_v3f16(
+; CHECK-NEXT:    [[A_I0:%.*]] = extractelement <3 x half> [[A:%.*]], i64 0
+; CHECK-NEXT:    [[B_I0:%.*]] = extractelement <3 x half> [[B:%.*]], i64 0
+; CHECK-NEXT:    [[R_I0:%.*]] = fadd half [[A_I0]], [[B_I0]]
+; CHECK-NEXT:    [[A_I1:%.*]] = extractelement <3 x half> [[A]], i64 1
+; CHECK-NEXT:    [[B_I1:%.*]] = extractelement <3 x half> [[B]], i64 1
+; CHECK-NEXT:    [[R_I1:%.*]] = fadd half [[A_I1]], [[B_I1]]
+; CHECK-NEXT:    [[A_I2:%.*]] = extractelement <3 x half> [[A]], i64 2
+; CHECK-NEXT:    [[B_I2:%.*]] = extractelement <3 x half> [[B]], i64 2
+; CHECK-NEXT:    [[R_I2:%.*]] = fadd half [[A_I2]], [[B_I2]]
+; CHECK-NEXT:    [[R_UPTO0:%.*]] = insertelement <3 x half> poison, half [[R_I0]], i64 0
+; CHECK-NEXT:    [[R_UPTO1:%.*]] = insertelement <3 x half> [[R_UPTO0]], half [[R_I1]], i64 1
+; CHECK-NEXT:    [[R:%.*]] = insertelement <3 x half> [[R_UPTO1]], half [[R_I2]], i64 2
+; CHECK-NEXT:    ret <3 x half> [[R]]
+;
+  %r = fadd <3 x half> %a, %b
+  ret <3 x half> %r
+}
+
+define <4 x half> @binary_v4f16(<4 x half> %a, <4 x half> %b) {
+; CHECK-LABEL: @binary_v4f16(
+; CHECK-NEXT:    [[A_I0:%.*]] = extractelement <4 x half> [[A:%.*]], i64 0
+; CHECK-NEXT:    [[B_I0:%.*]] = extractelement <4 x half> [[B:%.*]], i64 0
+; CHECK-NEXT:    [[R_I0:%.*]] = fadd half [[A_I0]], [[B_I0]]
+; CHECK-NEXT:    [[A_I1:%.*]] = extractelement <4 x half> [[A]], i64 1
+; CHECK-NEXT:    [[B_I1:%.*]] = extractelement <4 x half> [[B]], i64 1
+; CHECK-NEXT:    [[R_I1:%.*]] = fadd half [[A_I1]], [[B_I1]]
+; CHECK-NEXT:    [[A_I2:%.*]] = extractelement <4 x half> [[A]], i64 2
+; CHECK-NEXT:    [[B_I2:%.*]] = extractelement <4 x half> [[B]], i64 2
+; CHECK-NEXT:    [[R_I2:%.*]] = fadd half [[A_I2]], [[B_I2]]
+; CHECK-NEXT:    [[A_I3:%.*]] = extractelement <4 x half> [[A]], i64 3
+; CHECK-NEXT:    [[B_I3:%.*]] = extractelement <4 x half> [[B]], i64 3
+; CHECK-NEXT:    [[R_I3:%.*]] = fadd half [[A_I3]], [[B_I3]]
+; CHECK-NEXT:    [[R_UPTO0:%.*]] = insertelement <4 x half> poison, half [[R_I0]], i64 0
+; CHECK-NEXT:    [[R_UPTO1:%.*]] = insertelement <4 x half> [[R_UPTO0]], half [[R_I1]], i64 1
+; CHECK-NEXT:    [[R_UPTO2:%.*]] = insertelement <4 x half> [[R_UPTO1]], half [[R_I2]], i64 2
+; CHECK-NEXT:    [[R:%.*]] = insertelement <4 x half> [[R_UPTO2]], half [[R_I3]], i64 3
+; CHECK-NEXT:    ret <4 x half> [[R]]
+;
+  %r = fadd <4 x half> %a, %b
+  ret <4 x half> %r
+}
+
+define <2 x i16> @fptosi_v2f16(<2 x half> %a) {
+; CHECK-LABEL: @fptosi_v2f16(
+; CHECK-NEXT:    [[A_I0:%.*]] = extractelement <2 x half> [[A:%.*]], i64 0
+; CHECK-NEXT:    [[R_I0:%.*]] = fptosi half [[A_I0]] to i16
+; CHECK-NEXT:    [[A_I1:%.*]] = extractelement <2 x half> [[A]], i64 1
+; CHECK-NEXT:    [[R_I1:%.*]] = fptosi half [[A_I1]] to i16
+; CHECK-NEXT:    [[R_UPTO0:%.*]] = insertelement <2 x i16> poison, i16 [[R_I0]], i64 0
+; CHECK-NEXT:    [[R:%.*]] = insertelement <2 x i16> [[R_UPTO0]], i16 [[R_I1]], i64 1
+; CHECK-NEXT:    ret <2 x i16> [[R]]
+;
+  %r = fptosi <2 x half> %a to <2 x i16>
+  ret <2 x i16> %r
+}
+
+define <3 x i16> @fptosi_v3f16(<3 x half> %a) {
+; CHECK-LABEL: @fptosi_v3f16(
+; CHECK-NEXT:    [[A_I0:%.*]] = extractelement <3 x half> [[A:%.*]], i64 0
+; CHECK-NEXT:    [[R_I0:%.*]] = fptosi half [[A_I0]] to i16
+; CHECK-NEXT:    [[A_I1:%.*]] = extractelement <3 x half> [[A]], i64 1
+; CHECK-NEXT:    [[R_I1:%.*]] = fptosi half [[A_I1]] to i16
+; CHECK-NEXT:    [[A_I2:%.*]] = extractelement <3 x half> [[A]], i64 2
+; CHECK-NEXT:    [[R_I2:%.*]] = fptosi half [[A_I2]] to i16
+; CHECK-NEXT:    [[R_UPTO0:%.*]] = insertelement <3 x i16> poison, i16 [[R_I0]], i64 0
+; CHECK-NEXT:    [[R_UPTO1:%.*]] = insertelement <3 x i16> [[R_UPTO0]], i16 [[R_I1]], i64 1
+; CHECK-NEXT:    [[R:%.*]] = insertelement <3 x i16> [[R_UPTO1]], i16 [[R_I2]], i64 2
+; CHECK-NEXT:    ret <3 x i16> [[R]]
+;
+  %r = fptosi <3 x half> %a to <3 x i16>
+  ret <3 x i16> %r
+}
+
+define <4 x i16> @fptosi_v4f16(<4 x half> %a) {
+; CHECK-LABEL: @fptosi_v4f16(
+; CHECK-NEXT:    [[A_I0:%.*]] = extractelement <4 x half> [[A:%.*]], i64 0
+; CHECK-NEXT:    [[R_I0:%.*]] = fptosi half [[A_I0]] to i16
+; CHECK-NEXT:    [[A_I1:%.*]] = extractelement <4 x half> [[A]], i64 1
+; CHECK-NEXT:    [[R_I1:%.*]] = fptosi half [[A_I1]] to i16
+; CHECK-NEXT:    [[A_I2:%.*]] = extractelement <4 x half> [[A]], i64 2
+; CHECK-NEXT:    [[R_I2:%.*]] = fptosi half [[A_I2]] to i16
+; CHECK-NEXT:    [[A_I3:%.*]] = extractelement <4 x half> [[A]], i64 3
+; CHECK-NEXT:    [[R_I3:%.*]] = fptosi half [[A_I3]] to i16
+; CHECK-NEXT:    [[R_UPTO0:%.*]] = insertelement <4 x i16> poison, i16 [[R_I0]], i64 0
+; CHECK-NEXT:    [[R_UPTO1:%.*]] = insertelement <4 x i16> [[R_UPTO0]], i16 [[R_I1]], i64 1
+; CHECK-NEXT:    [[R_UPTO2:%.*]] = insertelement <4 x i16> [[R_UPTO1]], i16 [[R_I2]], i64 2
+; CHECK-NEXT:    [[R:%.*]] = insertelement <4 x i16> [[R_UPTO2]], i16 [[R_I3]], i64 3
+; CHECK-NEXT:    ret <4 x i16> [[R]]
+;
+  %r = fptosi <4 x half> %a to <4 x i16>
+  ret <4 x i16> %r
+}
+
+define <4 x float> @fpext_v4f16(<4 x half> %a) {
+; CHECK-LABEL: @fpext_v4f16(
+; CHECK-NEXT:    [[A_I0:%.*]] = extractelement <4 x half> [[A:%.*]], i64 0
+; CHECK-NEXT:    [[R_I0:%.*]] = fpext half [[A_I0]] to float
+; CHECK-NEXT:    [[A_I1:%.*]] = extractelement <4 x half> [[A]], i64 1
+; CHECK-NEXT:    [[R_I1:%.*]] = fpext half [[A_I1]] to float
+; CHECK-NEXT:    [[A_I2:%.*]] = extractelement <4 x half> [[A]], i64 2
+; CHECK-NEXT:    [[R_I2:%.*]] = fpext half [[A_I2]] to float
+; CHECK-NEXT:    [[A_I3:%.*]] = extractelement <4 x half> [[A]], i64 3
+; CHECK-NEXT:    [[R_I3:%.*]] = fpext half [[A_I3]] to float
+; CHECK-NEXT:    [[R_UPTO0:%.*]] = insertelement <4 x float> poison, float [[R_I0]], i64 0
+; CHECK-NEXT:    [[R_UPTO1:%.*]] = insertelement <4 x float> [[R_UPTO0]], float [[R_I1]], i64 1
+; CHECK-NEXT:    [[R_UPTO2:%.*]] = insertelement <4 x float> [[R_UPTO1]], float [[R_I2]], i64 2
+; CHECK-NEXT:    [[R:%.*]] = insertelement <4 x float> [[R_UPTO2]], float [[R_I3]], i64 3
+; CHECK-NEXT:    ret <4 x float> [[R]]
+;
+  %r = fpext <4 x half> %a to <4 x float>
+  ret <4 x float> %r
+}
+
+define <4 x i1> @icmp_v4f16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK-LABEL: @icmp_v4f16(
+; CHECK-NEXT:    [[A_I0:%.*]] = extractelement <4 x i16> [[A:%.*]], i64 0
+; CHECK-NEXT:    [[B_I0:%.*]] = extractelement <4 x i16> [[B:%.*]], i64 0
+; CHECK-NEXT:    [[R_I0:%.*]] = icmp ugt i16 [[A_I0]], [[B_I0]]
+; CHECK-NEXT:    [[A_I1:%.*]] = extractelement <4 x i16> [[A]], i64 1
+; CHECK-NEXT:    [[B_I1:%.*]] = extractelement <4 x i16> [[B]], i64 1
+; CHECK-NEXT:    [[R_I1:%.*]] = icmp ugt i16 [[A_I1]], [[B_I1]]
+; CHECK-NEXT:    [[A_I2:%.*]] = extractelement <4 x i16> [[A]], i64 2
+; CHECK-NEXT:    [[B_I2:%.*]] = extractelement <4 x i16> [[B]], i64 2
+; CHECK-NEXT:    [[R_I2:%.*]] = icmp ugt i16 [[A_I2]], [[B_I2]]
+; CHECK-NEXT:    [[A_I3:%.*]] = extractelement <4 x i16> [[A]], i64 3
+; CHECK-NEXT:    [[B_I3:%.*]] = extractelement <4 x i16> [[B]], i64 3
+; CHECK-NEXT:    [[R_I3:%.*]] = icmp ugt i16 [[A_I3]], [[B_I3]]
+; CHECK-NEXT:    [[R_UPTO0:%.*]] = insertelement <4 x i1> poison, i1 [[R_I0]], i64 0
+; CHECK-NEXT:    [[R_UPTO1:%.*]] = insertelement <4 x i1> [[R_UPTO0]], i1 [[R_I1]], i64 1
+; CHECK-NEXT:    [[R_UPTO2:%.*]] = insertelement <4 x i1> [[R_UPTO1]], i1 [[R_I2]], i64 2
+; CHECK-NEXT:    [[R:%.*]] = insertelement <4 x i1> [[R_UPTO2]], i1 [[R_I3]], i64 3
+; CHECK-NEXT:    ret <4 x i1> [[R]]
+;
+  %r = icmp ugt <4 x i16> %a, %b
+  ret <4 x i1> %r
+}
+
+define <4 x ptr> @gep1_v4(ptr %base, <4 x i16> %a) {
+; CHECK-LABEL: @gep1_v4(
+; CHECK-NEXT:    [[A_I0:%.*]] = extractelement <4 x i16> [[A:%.*]], i64 0
+; CHECK-NEXT:    [[A_I1:%.*]] = extractelement <4 x i16> [[A]], i64 1
+; CHECK-NEXT:    [[A_I2:%.*]] = extractelement <4 x i16> [[A]], i64 2
+; CHECK-NEXT:    [[A_I3:%.*]] = extractelement <4 x i16> [[A]], i64 3
+; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x ptr> poison, ptr [[BASE:%.*]], i64 0
+; CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x ptr> [[DOTSPLATINSERT]], <4 x ptr> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT:    [[DOTSPLAT_I0:%.*]] = extractelement <4 x ptr> [[DOTSPLAT]], i64 0
+; CHECK-NEXT:    [[P_I0:%.*]] = getelementptr i32, ptr [[DOTSPLAT_I0]], i16 [[A_I0]]
+; CHECK-NEXT:    [[DOTSPLAT_I1:%.*]] = extractelement <4 x ptr> [[DOTSPLAT]], i64 1
+; CHECK-NEXT:    [[P_I1:%.*]] = getelementptr i32, ptr [[DOTSPLAT_I1]], i16 [[A_I1]]
+; CHECK-NEXT:    [[DOTSPLAT_I2:%.*]] = extractelement <4 x ptr> [[DOTSPLAT]], i64 2
+; CHECK-NEXT:    [[P_I2:%.*]] = getelementptr i32, ptr [[DOTSPLAT_I2]], i16 [[A_I2]]
+; CHECK-NEXT:    [[DOTSPLAT_I3:%.*]] = extractelement <4 x ptr> [[DOTSPLAT]], i64 3
+; CHECK-NEXT:    [[P_I3:%.*]] = getelementptr i32, ptr [[DOTSPLAT_I3]], i16 [[A_I3]]
+; CHECK-NEXT:    [[P_UPTO0:%.*]] = insertelement <4 x ptr> poison, ptr [[P_I0]], i64 0
+; CHECK-NEXT:    [[P_UPTO1:%.*]] = insertelement <4 x ptr> [[P_UPTO0]], ptr [[P_I1]], i64 1
+; CHECK-NEXT:    [[P_UPTO2:%.*]] = insertelement <4 x ptr> [[P_UPTO1]], ptr [[P_I2]], i64 2
+; CHECK-NEXT:    [[P:%.*]] = insertelement <4 x ptr> [[P_UPTO2]], ptr [[P_I3]], i64 3
+; CHECK-NEXT:    ret <4 x ptr> [[P]]
+;
+  %p = getelementptr i32, ptr %base, <4 x i16> %a
+  ret <4 x ptr> %p
+}
+
+define <4 x ptr> @gep2_v4(<4 x ptr> %base, i16 %a) {
+; CHECK-LABEL: @gep2_v4(
+; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i16> poison, i16 [[A:%.*]], i64 0
+; CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x i16> [[DOTSPLATINSERT]], <4 x i16> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT:    [[DOTSPLAT_I0:%.*]] = extractelement <4 x i16> [[DOTSPLAT]], i64 0
+; CHECK-NEXT:    [[BASE_I0:%.*]] = extractelement <4 x ptr> [[BASE:%.*]], i64 0
+; CHECK-NEXT:    [[P_I0:%.*]] = getelementptr i32, ptr [[BASE_I0]], i16 [[DOTSPLAT_I0]]
+; CHECK-NEXT:    [[DOTSPLAT_I1:%.*]] = extractelement <4 x i16> [[DOTSPLAT]], i64 1
+; CHECK-NEXT:    [[BASE_I1:%.*]] = extractelement <4 x ptr> [[BASE]], i64 1
+; CHECK-NEXT:    [[P_I1:%.*]] = getelementptr i32, ptr [[BASE_I1]], i16 [[DOTSPLAT_I1]]
+; CHECK-NEXT:    [[DOTSPLAT_I2:%.*]] = extractelement <4 x i16> [[DOTSPLAT]], i64 2
+; CHECK-NEXT:    [[BASE_I2:%.*]] = extractelement <4 x ptr> [[BASE]], i64 2
+; CHECK-NEXT:    [[P_I2:%.*]] = getelementptr i32, ptr [[BASE_I2]], i16 [[DOTSPLAT_I2]]
+; CHECK-NEXT:    [[DOTSPLAT_I3:%.*]] = extractelement <4 x i16> [[DOTSPLAT]], i64 3
+; CHECK-NEXT:    [[BASE_I3:%.*]] = extractelement <4 x ptr> [[BASE]], i64 3
+; CHECK-NEXT:    [[P_I3:%.*]] = getelementptr i32, ptr [[BASE_I3]], i16 [[DOTSPLAT_I3]]
+; CHECK-NEXT:    [[P_UPTO0:%.*]] = insertelement <4 x ptr> poison, ptr [[P_I0]], i64 0
+; CHECK-NEXT:    [[P_UPTO1:%.*]] = insertelement <4 x ptr> [[P_UPTO0]], ptr [[P_I1]], i64 1
+; CHECK-NEXT:    [[P_UPTO2:%.*]] = insertelement <4 x ptr> [[P_UPTO1]], ptr [[P_I2]], i64 2
+; CHECK-NEXT:    [[P:%.*]] = insertelement <4 x ptr> [[P_UPTO2]], ptr [[P_I3]], i64 3
+; CHECK-NEXT:    ret <4 x ptr> [[P]]
+;
+  %p = getelementptr i32, <4 x ptr> %base, i16 %a
+  ret <4 x ptr> %p
+}
+
+define <4 x ptr> @gep3_v4(<4 x ptr> %base, <4 x i16> %a) {
+; CHECK-LABEL: @gep3_v4(
+; CHECK-NEXT:    [[A_I0:%.*]] = extractelement <4 x i16> [[A:%.*]], i64 0
+; CHECK-NEXT:    [[BASE_I0:%.*]] = extractelement <4 x ptr> [[BASE:%.*]], i64 0
+; CHECK-NEXT:    [[P_I0:%.*]] = getelementptr i32, ptr [[BASE_I0]], i16 [[A_I0]]
+; CHECK-NEXT:    [[A_I1:%.*]] = extractelement <4 x i16> [[A]], i64 1
+; CHECK-NEXT:    [[BASE_I1:%.*]] = extractelement <4 x ptr> [[BASE]], i64 1
+; CHECK-NEXT:    [[P_I1:%.*]] = getelementptr i32, ptr [[BASE_I1]], i16 [[A_I1]]
+; CHECK-NEXT:    [[A_I2:%.*]] = extractelement <4 x i16> [[A]], i64 2
+; CHECK-NEXT:    [[BASE_I2:%.*]] = extractelement <4 x ptr> [[BASE]], i64 2
+; CHECK-NEXT:    [[P_I2:%.*]] = getelementptr i32, ptr [[BASE_I2]], i16 [[A_I2]]
+; CHECK-NEXT:    [[A_I3:%.*]] = extractelement <4 x i16> [[A]], i64 3
+; CHECK-NEXT:    [[BASE_I3:%.*]] = extractelement <4 x ptr> [[BASE]], i64 3
+; CHECK-NEXT:    [[P_I3:%.*]] = getelementptr i32, ptr [[BASE_I3]], i16 [[A_I3]]
+; CHECK-NEXT:    [[P_UPTO0:%.*]] = insertelement <4 x ptr> poison, ptr [[P_I0]], i64 0
+; CHECK-NEXT:    [[P_UPTO1:%.*]] = insertelement <4 x ptr> [[P_UPTO0]], ptr [[P_I1]], i64 1
+; CHECK-NEXT:    [[P_UPTO2:%.*]] = insertelement <4 x ptr> [[P_UPTO1]], ptr [[P_I2]], i64 2
+; CHECK-NEXT:    [[P:%.*]] = insertelement <4 x ptr> [[P_UPTO2]], ptr [[P_I3]], i64 3
+; CHECK-NEXT:    ret <4 x ptr> [[P]]
+;
+  %p = getelementptr i32, <4 x ptr> %base, <4 x i16> %a
+  ret <4 x ptr> %p
+}
+
+define void @insertelement_v2i16(ptr %p, <2 x i16> %a, i16 %b) {
+; CHECK-LABEL: @insertelement_v2i16(
+; CHECK-NEXT:    [[P_I1:%.*]] = getelementptr i16, ptr [[P:%.*]], i32 1
+; CHECK-NEXT:    [[A_I0:%.*]] = extractelement <2 x i16> [[A:%.*]], i64 0
+; CHECK-NEXT:    store i16 [[A_I0]], ptr [[P]], align 4
+; CHECK-NEXT:    store i16 [[B:%.*]], ptr [[P_I1]], align 2
+; CHECK-NEXT:    ret void
+;
+  %r = insertelement <2 x i16> %a, i16 %b, i64 1
+  store <2 x i16> %r, ptr %p
+  ret void
+}
+
+define void @insertelement_v3i16(ptr %p, <3 x i16> %a, i16 %b) {
+; CHECK-LABEL: @insertelement_v3i16(
+; CHECK-NEXT:    [[P_I1:%.*]] = getelementptr i16, ptr [[P:%.*]], i32 1
+; CHECK-NEXT:    [[P_I2:%.*]] = getelementptr i16, ptr [[P]], i32 2
+; CHECK-NEXT:    [[A_I0:%.*]] = extractelement <3 x i16> [[A:%.*]], i64 0
+; CHECK-NEXT:    [[A_I1:%.*]] = extractelement <3 x i16> [[A]], i64 1
+; CHECK-NEXT:    store i16 [[A_I0]], ptr [[P]], align 8
+; CHECK-NEXT:    store i16 [[A_I1]], ptr [[P_I1]], align 2
+; CHECK-NEXT:    store i16 [[B:%.*]], ptr [[P_I2]], align 4
+; CHECK-NEXT:    ret void
+;
+  %r = insertelement <3 x i16> %a, i16 %b, i64 2
+  store <3 x i16> %r, ptr %p
+  ret void
+}
+
+define void @insertelement_v4i16(ptr %p, <4 x i16> %a, i16 %b) {
+; CHECK-LABEL: @insertelement_v4i16(
+; CHECK-NEXT:    [[P_I1:%.*]] = getelementptr i16, ptr [[P:%.*]], i32 1
+; CHECK-NEXT:    [[P_I2:%.*]] = getelementptr i16, ptr [[P]], i32 2
+; CHECK-NEXT:    [[P_I3:%.*]] = getelementptr i16, ptr [[P]], i32 3
+; CHECK-NEXT:    [[A_I0:%.*]] = extractelement <4 x i16> [[A:%.*]], i64 0
+; CHECK-NEXT:    [[A_I1:%.*]] = extractelement <4 x i16> [[A]], i64 1
+; CHECK-NEXT:    [[A_I2:%.*]] = extractelement <4 x i16> [[A]], i64 2
+; CHECK-NEXT:    store i16 [[A_I0]], ptr [[P]], align 8
+; CHECK-NEXT:    store i16 [[A_I1]], ptr [[P_I1]], align 2
+; CHECK-NEXT:    store i16 [[A_I2]], ptr [[P_I2]], align 4
+; CHECK-NEXT:    store i16 [[B:%.*]], ptr [[P_I3]], align 2
+; CHECK-NEXT:    ret void
+;
+  %r = insertelement <4 x i16> %a, i16 %b, i64 3
+  store <4 x i16> %r, ptr %p
+  ret void
+}
+
+define <2 x i16> @load_insertelement_v2i16(ptr %pa, i16 %b) {
+; CHECK-LABEL: @load_insertelement_v2i16(
+; CHECK-NEXT:    [[A_I0:%.*]] = load i16, ptr [[PA:%.*]], align 4
+; CHECK-NEXT:    [[R_UPTO0:%.*]] = insertelement <2 x i16> poison, i16 [[A_I0]], i64 0
+; CHECK-NEXT:    [[R:%.*]] = insertelement <2 x i16> [[R_UPTO0]], i16 [[B:%.*]], i64 1
+; CHECK-NEXT:    ret <2 x i16> [[R]]
+;
+  %a = load <2 x i16>, ptr %pa
+  %r = insertelement <2 x i16> %a, i16 %b, i64 1
+  ret <2 x i16> %r
+}
+
+define <3 x i16> @load_insertelement_v3i16(ptr %pa, i16 %b) {
+; CHECK-LABEL: @load_insertelement_v3i16(
+; CHECK-NEXT:    [[A_I0:%.*]] = load i16, ptr [[PA:%.*]], align 8
+; CHECK-NEXT:    [[PA_I1:%.*]] = getelementptr i16, ptr [[PA]], i32 1
+; CHECK-NEXT:    [[A_I1:%.*]] = load i16, ptr [[PA_I1]], align 2
+; CHECK-NEXT:    [[R_UPTO0:%.*]] = insertelement <3 x i16> poison, i16 [[A_I0]], i64 0
+; CHECK-NEXT:    [[R_UPTO1:%.*]] = insertelement <3 x i16> [[R_UPTO0]], i16 [[A_I1]], i64 1
+; CHECK-NEXT:    [[R:%.*]] = insertelement <3 x i16> [[R_UPTO1]], i16 [[B:%.*]], i64 2
+; CHECK-NEXT:    ret <3 x i16> [[R]]
+;
+  %a = load <3 x i16>, ptr %pa
+  %r = insertelement <3 x i16> %a, i16 %b, i64 2
+  ret <3 x i16> %r
+}
+
+define <4 x i16> @load_insertelement_v4i16(ptr %pa, i16 %b) {
+; CHECK-LABEL: @load_insertelement_v4i16(
+; CHECK-NEXT:    [[A_I0:%.*]] = load i16, ptr [[PA:%.*]], align 8
+; CHECK-NEXT:    [[PA_I1:%.*]] = getelementptr i16, ptr [[PA]], i32 1
+; CHECK-NEXT:    [[A_I1:%.*]] = load i16, ptr [[PA_I1]], align 2
+; CHECK-NEXT:    [[PA_I2:%.*]] = getelementptr i16, ptr [[PA]], i32 2
+; CHECK-NEXT:    [[A_I2:%.*]] = load i16, ptr [[PA_I2]], align 4
+; CHECK-NEXT:    [[R_UPTO0:%.*]] = insertelement <4 x i16> poison, i16 [[A_I0]], i64 0
+; CHECK-NEXT:    [[R_UPTO1:%.*]] = insertelement <4 x i16> [[R_UPTO0]], i16 [[A_I1]], i64 1
+; CHECK-NEXT:    [[R_UPTO2:%.*]] = insertelement <4 x i16> [[R_UPTO1]], i16 [[A_I2]], i64 2
+; CHECK-NEXT:    [[R:%.*]] = insertelement <4 x i16> [[R_UPTO2]], i16 [[B:%.*]], i64 3
+; CHECK-NEXT:    ret <4 x i16> [[R]]
+;
+  %a = load <4 x i16>, ptr %pa
+  %r = insertelement <4 x i16> %a, i16 %b, i64 3
+  ret <4 x i16> %r
+}
+
+define void @shufflevector_grow(ptr %pa, ptr %pb) {
+; CHECK-LABEL: @shufflevector_grow(
+; CHECK-NEXT:    [[PA_I11:%.*]] = getelementptr i16, ptr [[PA:%.*]], i32 1
+; CHECK-NEXT:    [[PA_I2:%.*]] = getelementptr i16, ptr [[PA]], i32 2
+; CHECK-NEXT:    [[PA_I3:%.*]] = getelementptr i16, ptr [[PA]], i32 3
+; CHECK-NEXT:    [[PB_I1:%.*]] = getelementptr i16, ptr [[PB:%.*]], i32 1
+; CHECK-NEXT:    [[A_I0:%.*]] = load i16, ptr [[PA]], align 4
+; CHECK-NEXT:    [[PA_I1:%.*]] = getelementptr i16, ptr [[PA]], i32 1
+; CHECK-NEXT:    [[A_I1:%.*]] = load i16, ptr [[PA_I1]], align 2
+; CHECK-NEXT:    [[B_I0:%.*]] = load i16, ptr [[PB]], align 4
+; CHECK-NEXT:    [[B_I1:%.*]] = load i16, ptr [[PB_I1]], align 2
+; CHECK-NEXT:    store i16 [[A_I0]], ptr [[PA]], align 8
+; CHECK-NEXT:    store i16 [[A_I1]], ptr [[PA_I11]], align 2
+; CHECK-NEXT:    store i16 [[B_I0]], ptr [[PA_I2]], align 4
+; CHECK-NEXT:    store i16 [[B_I1]], ptr [[PA_I3]], align 2
+; CHECK-NEXT:    ret void
+;
+  %a = load <2 x i16>, ptr %pa
+  %b = load <2 x i16>, ptr %pb
+  %r = shufflevector <2 x i16> %a, <2 x i16> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  store <4 x i16> %r, ptr %pa
+  ret void
+}
+
+define void @shufflevector_shrink(ptr %pa) {
+; CHECK-LABEL: @shufflevector_shrink(
+; CHECK-NEXT:    [[PA_I11:%.*]] = getelementptr i16, ptr [[PA:%.*]], i32 1
+; CHECK-NEXT:    [[PA_I1:%.*]] = getelementptr i16, ptr [[PA]], i32 1
+; CHECK-NEXT:    [[A_I1:%.*]] = load i16, ptr [[PA_I1]], align 2
+; CHECK-NEXT:    [[PA_I2:%.*]] = getelementptr i16, ptr [[PA]], i32 2
+; CHECK-NEXT:    [[A_I2:%.*]] = load i16, ptr [[PA_I2]], align 4
+; CHECK-NEXT:    store i16 [[A_I1]], ptr [[PA]], align 4
+; CHECK-NEXT:    store i16 [[A_I2]], ptr [[PA_I11]], align 2
+; CHECK-NEXT:    ret void
+;
+  %a = load <4 x i16>, ptr %pa
+  %r = shufflevector <4 x i16> %a, <4 x i16> poison, <2 x i32> <i32 1, i32 2>
+  store <2 x i16> %r, ptr %pa
+  ret void
+}
+
+define void @phi_v2f16(ptr %base, i64 %bound) {
+; CHECK-LABEL: @phi_v2f16(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[BASE_I1:%.*]] = getelementptr half, ptr [[BASE:%.*]], i32 1
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[X_I0:%.*]] = phi half [ 0xH0000, [[ENTRY:%.*]] ], [ [[X_NEXT_I0:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[X_I1:%.*]] = phi half [ 0xH0000, [[ENTRY]] ], [ [[X_NEXT_I1:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[IDX:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[IDX_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[P:%.*]] = getelementptr <2 x half>, ptr [[BASE]], i64 [[IDX]]
+; CHECK-NEXT:    [[A_I0:%.*]] = load half, ptr [[P]], align 2
+; CHECK-NEXT:    [[P_I1:%.*]] = getelementptr half, ptr [[P]], i32 1
+; CHECK-NEXT:    [[A_I1:%.*]] = load half, ptr [[P_I1]], align 2
+; CHECK-NEXT:    [[X_NEXT_I0]] = fadd half [[X_I0]], [[A_I0]]
+; CHECK-NEXT:    [[X_NEXT_I1]] = fadd half [[X_I1]], [[A_I1]]
+; CHECK-NEXT:    [[IDX_NEXT]] = add i64 [[IDX]], 1
+; CHECK-NEXT:    [[CC:%.*]] = icmp ult i64 [[IDX_NEXT]], [[BOUND:%.*]]
+; CHECK-NEXT:    br i1 [[CC]], label [[LOOP]], label [[END:%.*]]
+; CHECK:       end:
+; CHECK-NEXT:    store half [[X_NEXT_I0]], ptr [[BASE]], align 4
+; CHECK-NEXT:    store half [[X_NEXT_I1]], ptr [[BASE_I1]], align 2
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %loop
+
+loop:
+  %x = phi <2 x half> [ zeroinitializer, %entry ], [ %x.next, %loop ]
+  %idx = phi i64 [ 0, %entry ], [ %idx.next, %loop ]
+  %p = getelementptr <2 x half>, ptr %base, i64 %idx
+  %a = load <2 x half>, ptr %p, align 2
+  %x.next = fadd <2 x half> %x, %a
+  %idx.next = add i64 %idx, 1
+  %cc = icmp ult i64 %idx.next, %bound
+  br i1 %cc, label %loop, label %end
+
+end:
+  store <2 x half> %x.next, ptr %base
+  ret void
+}
+
+define void @phi_v3f16(ptr %base, i64 %bound) {
+; CHECK-LABEL: @phi_v3f16(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[BASE_I1:%.*]] = getelementptr half, ptr [[BASE:%.*]], i32 1
+; CHECK-NEXT:    [[BASE_I2:%.*]] = getelementptr half, ptr [[BASE]], i32 2
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[X_I0:%.*]] = phi half [ 0xH0000, [[ENTRY:%.*]] ], [ [[X_NEXT_I0:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[X_I1:%.*]] = phi half [ 0xH0000, [[ENTRY]] ], [ [[X_NEXT_I1:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[X_I2:%.*]] = phi half [ 0xH0000, [[ENTRY]] ], [ [[X_NEXT_I2:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[IDX:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[IDX_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[P:%.*]] = getelementptr <3 x half>, ptr [[BASE]], i64 [[IDX]]
+; CHECK-NEXT:    [[A_I0:%.*]] = load half, ptr [[P]], align 2
+; CHECK-NEXT:    [[P_I1:%.*]] = getelementptr half, ptr [[P]], i32 1
+; CHECK-NEXT:    [[A_I1:%.*]] = load half, ptr [[P_I1]], align 2
+; CHECK-NEXT:    [[P_I2:%.*]] = getelementptr half, ptr [[P]], i32 2
+; CHECK-NEXT:    [[A_I2:%.*]] = load half, ptr [[P_I2]], align 2
+; CHECK-NEXT:    [[X_NEXT_I0]] = fadd half [[X_I0]], [[A_I0]]
+; CHECK-NEXT:    [[X_NEXT_I1]] = fadd half [[X_I1]], [[A_I1]]
+; CHECK-NEXT:    [[X_NEXT_I2]] = fadd half [[X_I2]], [[A_I2]]
+; CHECK-NEXT:    [[IDX_NEXT]] = add i64 [[IDX]], 1
+; CHECK-NEXT:    [[CC:%.*]] = icmp ult i64 [[IDX_NEXT]], [[BOUND:%.*]]
+; CHECK-NEXT:    br i1 [[CC]], label [[LOOP]], label [[END:%.*]]
+; CHECK:       end:
+; CHECK-NEXT:    store half [[X_NEXT_I0]], ptr [[BASE]], align 8
+; CHECK-NEXT:    store half [[X_NEXT_I1]], ptr [[BASE_I1]], align 2
+; CHECK-NEXT:    store half [[X_NEXT_I2]], ptr [[BASE_I2]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %loop
+
+loop:
+  %x = phi <3 x half> [ zeroinitializer, %entry ], [ %x.next, %loop ]
+  %idx = phi i64 [ 0, %entry ], [ %idx.next, %loop ]
+  %p = getelementptr <3 x half>, ptr %base, i64 %idx
+  %a = load <3 x half>, ptr %p, align 2
+  %x.next = fadd <3 x half> %x, %a
+  %idx.next = add i64 %idx, 1
+  %cc = icmp ult i64 %idx.next, %bound
+  br i1 %cc, label %loop, label %end
+
+end:
+  store <3 x half> %x.next, ptr %base
+  ret void
+}
+
+define void @phi_v4f16(ptr %base, i64 %bound) {
+; CHECK-LABEL: @phi_v4f16(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[BASE_I1:%.*]] = getelementptr half, ptr [[BASE:%.*]], i32 1
+; CHECK-NEXT:    [[BASE_I2:%.*]] = getelementptr half, ptr [[BASE]], i32 2
+; CHECK-NEXT:    [[BASE_I3:%.*]] = getelementptr half, ptr [[BASE]], i32 3
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[X_I0:%.*]] = phi half [ 0xH0000, [[ENTRY:%.*]] ], [ [[X_NEXT_I0:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[X_I1:%.*]] = phi half [ 0xH0000, [[ENTRY]] ], [ [[X_NEXT_I1:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[X_I2:%.*]] = phi half [ 0xH0000, [[ENTRY]] ], [ [[X_NEXT_I2:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[X_I3:%.*]] = phi half [ 0xH0000, [[ENTRY]] ], [ [[X_NEXT_I3:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[IDX:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[IDX_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[P:%.*]] = getelementptr <4 x half>, ptr [[BASE]], i64 [[IDX]]
+; CHECK-NEXT:    [[A_I0:%.*]] = load half, ptr [[P]], align 2
+; CHECK-NEXT:    [[P_I1:%.*]] = getelementptr half, ptr [[P]], i32 1
+; CHECK-NEXT:    [[A_I1:%.*]] = load half, ptr [[P_I1]], align 2
+; CHECK-NEXT:    [[P_I2:%.*]] = getelementptr half, ptr [[P]], i32 2
+; CHECK-NEXT:    [[A_I2:%.*]] = load half, ptr [[P_I2]], align 2
+; CHECK-NEXT:    [[P_I3:%.*]] = getelementptr half, ptr [[P]], i32 3
+; CHECK-NEXT:    [[A_I3:%.*]] = load half, ptr [[P_I3]], align 2
+; CHECK-NEXT:    [[X_NEXT_I0]] = fadd half [[X_I0]], [[A_I0]]
+; CHECK-NEXT:    [[X_NEXT_I1]] = fadd half [[X_I1]], [[A_I1]]
+; CHECK-NEXT:    [[X_NEXT_I2]] = fadd half [[X_I2]], [[A_I2]]
+; CHECK-NEXT:    [[X_NEXT_I3]] = fadd half [[X_I3]], [[A_I3]]
+; CHECK-NEXT:    [[IDX_NEXT]] = add i64 [[IDX]], 1
+; CHECK-NEXT:    [[CC:%.*]] = icmp ult i64 [[IDX_NEXT]], [[BOUND:%.*]]
+; CHECK-NEXT:    br i1 [[CC]], label [[LOOP]], label [[END:%.*]]
+; CHECK:       end:
+; CHECK-NEXT:    store half [[X_NEXT_I0]], ptr [[BASE]], align 8
+; CHECK-NEXT:    store half [[X_NEXT_I1]], ptr [[BASE_I1]], align 2
+; CHECK-NEXT:    store half [[X_NEXT_I2]], ptr [[BASE_I2]], align 4
+; CHECK-NEXT:    store half [[X_NEXT_I3]], ptr [[BASE_I3]], align 2
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %loop
+
+loop:
+  %x = phi <4 x half> [ zeroinitializer, %entry ], [ %x.next, %loop ]
+  %idx = phi i64 [ 0, %entry ], [ %idx.next, %loop ]
+  %p = getelementptr <4 x half>, ptr %base, i64 %idx
+  %a = load <4 x half>, ptr %p, align 2
+  %x.next = fadd <4 x half> %x, %a
+  %idx.next = add i64 %idx, 1
+  %cc = icmp ult i64 %idx.next, %bound
+  br i1 %cc, label %loop, label %end
+
+end:
+  store <4 x half> %x.next, ptr %base
+  ret void
+}
+
+define <2 x half> @call_v2f16(<2 x half> %a, <2 x half> %b) {
+; CHECK-LABEL: @call_v2f16(
+; CHECK-NEXT:    [[A_I0:%.*]] = extractelement <2 x half> [[A:%.*]], i64 0
+; CHECK-NEXT:    [[B_I0:%.*]] = extractelement <2 x half> [[B:%.*]], i64 0
+; CHECK-NEXT:    [[R_I0:%.*]] = call half @llvm.minnum.f16(half [[A_I0]], half [[B_I0]])
+; CHECK-NEXT:    [[A_I1:%.*]] = extractelement <2 x half> [[A]], i64 1
+; CHECK-NEXT:    [[B_I1:%.*]] = extractelement <2 x half> [[B]], i64 1
+; CHECK-NEXT:    [[R_I1:%.*]] = call half @llvm.minnum.f16(half [[A_I1]], half [[B_I1]])
+; CHECK-NEXT:    [[R_UPTO0:%.*]] = insertelement <2 x half> poison, half [[R_I0]], i64 0
+; CHECK-NEXT:    [[R:%.*]] = insertelement <2 x half> [[R_UPTO0]], half [[R_I1]], i64 1
+; CHECK-NEXT:    ret <2 x half> [[R]]
+;
+  %r = call <2 x half> @llvm.minnum.v2f16(<2 x half> %a, <2 x half> %b)
+  ret <2 x half> %r
+}
+
+define <3 x half> @call_v3f16(<3 x half> %a, <3 x half> %b) {
+; CHECK-LABEL: @call_v3f16(
+; CHECK-NEXT:    [[A_I0:%.*]] = extractelement <3 x half> [[A:%.*]], i64 0
+; CHECK-NEXT:    [[B_I0:%.*]] = extractelement <3 x half> [[B:%.*]], i64 0
+; CHECK-NEXT:    [[R_I0:%.*]] = call half @llvm.minnum.f16(half [[A_I0]], half [[B_I0]])
+; CHECK-NEXT:    [[A_I1:%.*]] = extractelement <3 x half> [[A]], i64 1
+; CHECK-NEXT:    [[B_I1:%.*]] = extractelement <3 x half> [[B]], i64 1
+; CHECK-NEXT:    [[R_I1:%.*]] = call half @llvm.minnum.f16(half [[A_I1]], half [[B_I1]])
+; CHECK-NEXT:    [[A_I2:%.*]] = extractelement <3 x half> [[A]], i64 2
+; CHECK-NEXT:    [[B_I2:%.*]] = extractelement <3 x half> [[B]], i64 2
+; CHECK-NEXT:    [[R_I2:%.*]] = call half @llvm.minnum.f16(half [[A_I2]], half [[B_I2]])
+; CHECK-NEXT:    [[R_UPTO0:%.*]] = insertelement <3 x half> poison, half [[R_I0]], i64 0
+; CHECK-NEXT:    [[R_UPTO1:%.*]] = insertelement <3 x half> [[R_UPTO0]], half [[R_I1]], i64 1
+; CHECK-NEXT:    [[R:%.*]] = insertelement <3 x half> [[R_UPTO1]], half [[R_I2]], i64 2
+; CHECK-NEXT:    ret <3 x half> [[R]]
+;
+  %r = call <3 x half> @llvm.minnum.v3f16(<3 x half> %a, <3 x half> %b)
+  ret <3 x half> %r
+}
+
+define <4 x half> @call_v4f16(<4 x half> %a, <4 x half> %b) {
+; CHECK-LABEL: @call_v4f16(
+; CHECK-NEXT:    [[A_I0:%.*]] = extractelement <4 x half> [[A:%.*]], i64 0
+; CHECK-NEXT:    [[B_I0:%.*]] = extractelement <4 x half> [[B:%.*]], i64 0
+; CHECK-NEXT:    [[R_I0:%.*]] = call half @llvm.minnum.f16(half [[A_I0]], half [[B_I0]])
+; CHECK-NEXT:    [[A_I1:%.*]] = extractelement <4 x half> [[A]], i64 1
+; CHECK-NEXT:    [[B_I1:%.*]] = extractelement <4 x half> [[B]], i64 1
+; CHECK-NEXT:    [[R_I1:%.*]] = call half @llvm.minnum.f16(half [[A_I1]], half [[B_I1]])
+; CHECK-NEXT:    [[A_I2:%.*]] = extractelement <4 x half> [[A]], i64 2
+; CHECK-NEXT:    [[B_I2:%.*]] = extractelement <4 x half> [[B]], i64 2
+; CHECK-NEXT:    [[R_I2:%.*]] = call half @llvm.minnum.f16(half [[A_I2]], half [[B_I2]])
+; CHECK-NEXT:    [[A_I3:%.*]] = extractelement <4 x half> [[A]], i64 3
+; CHECK-NEXT:    [[B_I3:%.*]] = extractelement <4 x half> [[B]], i64 3
+; CHECK-NEXT:    [[R_I3:%.*]] = call half @llvm.minnum.f16(half [[A_I3]], half [[B_I3]])
+; CHECK-NEXT:    [[R_UPTO0:%.*]] = insertelement <4 x half> poison, half [[R_I0]], i64 0
+; CHECK-NEXT:    [[R_UPTO1:%.*]] = insertelement <4 x half> [[R_UPTO0]], half [[R_I1]], i64 1
+; CHECK-NEXT:    [[R_UPTO2:%.*]] = insertelement <4 x half> [[R_UPTO1]], half [[R_I2]], i64 2
+; CHECK-NEXT:    [[R:%.*]] = insertelement <4 x half> [[R_UPTO2]], half [[R_I3]], i64 3
+; CHECK-NEXT:    ret <4 x half> [[R]]
+;
+  %r = call <4 x half> @llvm.minnum.v4f16(<4 x half> %a, <4 x half> %b)
+  ret <4 x half> %r
+}
+
+declare <2 x half> @llvm.minnum.v2f16(<2 x half>, <2 x half>)
+declare <3 x half> @llvm.minnum.v3f16(<3 x half>, <3 x half>)
+declare <4 x half> @llvm.minnum.v4f16(<4 x half>, <4 x half>)


        


More information about the llvm-commits mailing list