[llvm] [AMDGPU][NFC] Add tests in preparation for i8 vectorization (PR #138801)

Jeffrey Byrnes via llvm-commits llvm-commits at lists.llvm.org
Wed May 7 11:02:05 PDT 2025


================
@@ -0,0 +1,1261 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -passes=slp-vectorizer %s | FileCheck -check-prefix=GFX7 %s
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -passes=slp-vectorizer %s | FileCheck -check-prefix=GFX8 %s
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -passes=slp-vectorizer %s | FileCheck -check-prefix=GFX9 %s
+
+define protected amdgpu_kernel void @arith_1(<16 x i8> %invec, ptr %out, i32 %flag) {
+; GFX7-LABEL: define protected amdgpu_kernel void @arith_1(
+; GFX7-SAME: <16 x i8> [[INVEC:%.*]], ptr [[OUT:%.*]], i32 [[FLAG:%.*]]) #[[ATTR0:[0-9]+]] {
+; GFX7-NEXT:  [[ENTRY:.*:]]
+; GFX7-NEXT:    [[EL0:%.*]] = extractelement <16 x i8> [[INVEC]], i64 0
+; GFX7-NEXT:    [[MUL0:%.*]] = mul i8 [[EL0]], 1
+; GFX7-NEXT:    [[ADD0:%.*]] = add i8 [[MUL0]], 1
+; GFX7-NEXT:    [[VECINS0:%.*]] = insertelement <16 x i8> poison, i8 [[ADD0]], i64 0
+; GFX7-NEXT:    store <16 x i8> [[VECINS0]], ptr [[OUT]], align 16
+; GFX7-NEXT:    ret void
+;
+; GFX8-LABEL: define protected amdgpu_kernel void @arith_1(
+; GFX8-SAME: <16 x i8> [[INVEC:%.*]], ptr [[OUT:%.*]], i32 [[FLAG:%.*]]) #[[ATTR0:[0-9]+]] {
+; GFX8-NEXT:  [[ENTRY:.*:]]
+; GFX8-NEXT:    [[EL0:%.*]] = extractelement <16 x i8> [[INVEC]], i64 0
+; GFX8-NEXT:    [[MUL0:%.*]] = mul i8 [[EL0]], 1
+; GFX8-NEXT:    [[ADD0:%.*]] = add i8 [[MUL0]], 1
+; GFX8-NEXT:    [[VECINS0:%.*]] = insertelement <16 x i8> poison, i8 [[ADD0]], i64 0
+; GFX8-NEXT:    store <16 x i8> [[VECINS0]], ptr [[OUT]], align 16
+; GFX8-NEXT:    ret void
+;
+; GFX9-LABEL: define protected amdgpu_kernel void @arith_1(
+; GFX9-SAME: <16 x i8> [[INVEC:%.*]], ptr [[OUT:%.*]], i32 [[FLAG:%.*]]) #[[ATTR0:[0-9]+]] {
+; GFX9-NEXT:  [[ENTRY:.*:]]
+; GFX9-NEXT:    [[EL0:%.*]] = extractelement <16 x i8> [[INVEC]], i64 0
+; GFX9-NEXT:    [[MUL0:%.*]] = mul i8 [[EL0]], 1
+; GFX9-NEXT:    [[ADD0:%.*]] = add i8 [[MUL0]], 1
+; GFX9-NEXT:    [[VECINS0:%.*]] = insertelement <16 x i8> poison, i8 [[ADD0]], i64 0
+; GFX9-NEXT:    store <16 x i8> [[VECINS0]], ptr [[OUT]], align 16
+; GFX9-NEXT:    ret void
+;
+entry:
+  %el0 = extractelement <16 x i8> %invec, i64  0
+  %mul0 = mul i8 %el0, 1
+  %add0 = add i8 %mul0, 1
+  %vecins0 = insertelement <16 x i8> poison, i8 %add0, i64 0
+  store <16 x i8> %vecins0, ptr %out
+  ret void
+}
+
+define protected amdgpu_kernel void @arith_2(<16 x i8> %invec, ptr %out, i32 %flag) {
+; GFX7-LABEL: define protected amdgpu_kernel void @arith_2(
+; GFX7-SAME: <16 x i8> [[INVEC:%.*]], ptr [[OUT:%.*]], i32 [[FLAG:%.*]]) #[[ATTR0]] {
+; GFX7-NEXT:  [[ENTRY:.*:]]
+; GFX7-NEXT:    [[EL0:%.*]] = extractelement <16 x i8> [[INVEC]], i64 0
+; GFX7-NEXT:    [[EL1:%.*]] = extractelement <16 x i8> [[INVEC]], i64 1
+; GFX7-NEXT:    [[MUL0:%.*]] = mul i8 [[EL0]], 1
+; GFX7-NEXT:    [[MUL1:%.*]] = mul i8 [[EL1]], 1
+; GFX7-NEXT:    [[ADD0:%.*]] = add i8 [[MUL0]], 1
+; GFX7-NEXT:    [[ADD1:%.*]] = add i8 [[MUL1]], 1
+; GFX7-NEXT:    [[VECINS0:%.*]] = insertelement <16 x i8> poison, i8 [[ADD0]], i64 0
+; GFX7-NEXT:    [[TMP3:%.*]] = insertelement <16 x i8> [[VECINS0]], i8 [[ADD1]], i64 1
+; GFX7-NEXT:    store <16 x i8> [[TMP3]], ptr [[OUT]], align 16
+; GFX7-NEXT:    ret void
+;
+; GFX8-LABEL: define protected amdgpu_kernel void @arith_2(
+; GFX8-SAME: <16 x i8> [[INVEC:%.*]], ptr [[OUT:%.*]], i32 [[FLAG:%.*]]) #[[ATTR0]] {
+; GFX8-NEXT:  [[ENTRY:.*:]]
+; GFX8-NEXT:    [[EL0:%.*]] = extractelement <16 x i8> [[INVEC]], i64 0
+; GFX8-NEXT:    [[EL1:%.*]] = extractelement <16 x i8> [[INVEC]], i64 1
+; GFX8-NEXT:    [[MUL0:%.*]] = mul i8 [[EL0]], 1
+; GFX8-NEXT:    [[MUL1:%.*]] = mul i8 [[EL1]], 1
+; GFX8-NEXT:    [[ADD0:%.*]] = add i8 [[MUL0]], 1
+; GFX8-NEXT:    [[ADD1:%.*]] = add i8 [[MUL1]], 1
+; GFX8-NEXT:    [[VECINS0:%.*]] = insertelement <16 x i8> poison, i8 [[ADD0]], i64 0
+; GFX8-NEXT:    [[TMP3:%.*]] = insertelement <16 x i8> [[VECINS0]], i8 [[ADD1]], i64 1
+; GFX8-NEXT:    store <16 x i8> [[TMP3]], ptr [[OUT]], align 16
+; GFX8-NEXT:    ret void
+;
+; GFX9-LABEL: define protected amdgpu_kernel void @arith_2(
+; GFX9-SAME: <16 x i8> [[INVEC:%.*]], ptr [[OUT:%.*]], i32 [[FLAG:%.*]]) #[[ATTR0]] {
+; GFX9-NEXT:  [[ENTRY:.*:]]
+; GFX9-NEXT:    [[EL0:%.*]] = extractelement <16 x i8> [[INVEC]], i64 0
+; GFX9-NEXT:    [[EL1:%.*]] = extractelement <16 x i8> [[INVEC]], i64 1
+; GFX9-NEXT:    [[MUL0:%.*]] = mul i8 [[EL0]], 1
+; GFX9-NEXT:    [[MUL1:%.*]] = mul i8 [[EL1]], 1
+; GFX9-NEXT:    [[ADD0:%.*]] = add i8 [[MUL0]], 1
+; GFX9-NEXT:    [[ADD1:%.*]] = add i8 [[MUL1]], 1
+; GFX9-NEXT:    [[VECINS0:%.*]] = insertelement <16 x i8> poison, i8 [[ADD0]], i64 0
+; GFX9-NEXT:    [[TMP3:%.*]] = insertelement <16 x i8> [[VECINS0]], i8 [[ADD1]], i64 1
+; GFX9-NEXT:    store <16 x i8> [[TMP3]], ptr [[OUT]], align 16
+; GFX9-NEXT:    ret void
+;
+entry:
+  %el0 = extractelement <16 x i8> %invec, i64  0
+  %el1 = extractelement <16 x i8> %invec, i64  1
+  %mul0 = mul i8 %el0, 1
+  %mul1 = mul i8 %el1, 1
+  %add0 = add i8 %mul0, 1
+  %add1 = add i8 %mul1, 1
+  %vecins0 = insertelement <16 x i8> poison, i8 %add0, i64 0
+  %vecins1 = insertelement <16 x i8> %vecins0, i8 %add1, i64 1
+  store <16 x i8> %vecins1, ptr %out
+  ret void
+}
+
+define protected amdgpu_kernel void @arith_3(<16 x i8> %invec, ptr %out, i32 %flag) {
+; GFX7-LABEL: define protected amdgpu_kernel void @arith_3(
+; GFX7-SAME: <16 x i8> [[INVEC:%.*]], ptr [[OUT:%.*]], i32 [[FLAG:%.*]]) #[[ATTR0]] {
+; GFX7-NEXT:  [[ENTRY:.*:]]
+; GFX7-NEXT:    [[EL0:%.*]] = extractelement <16 x i8> [[INVEC]], i64 0
+; GFX7-NEXT:    [[EL1:%.*]] = extractelement <16 x i8> [[INVEC]], i64 1
+; GFX7-NEXT:    [[EL2:%.*]] = extractelement <16 x i8> [[INVEC]], i64 2
+; GFX7-NEXT:    [[MUL2:%.*]] = mul i8 [[EL0]], 1
+; GFX7-NEXT:    [[MUL1:%.*]] = mul i8 [[EL1]], 1
+; GFX7-NEXT:    [[MUL3:%.*]] = mul i8 [[EL2]], 1
+; GFX7-NEXT:    [[ADD2:%.*]] = add i8 [[MUL2]], 1
+; GFX7-NEXT:    [[ADD1:%.*]] = add i8 [[MUL1]], 1
+; GFX7-NEXT:    [[ADD3:%.*]] = add i8 [[MUL3]], 1
+; GFX7-NEXT:    [[VECINS0:%.*]] = insertelement <16 x i8> poison, i8 [[ADD2]], i64 0
+; GFX7-NEXT:    [[VECINS1:%.*]] = insertelement <16 x i8> [[VECINS0]], i8 [[ADD1]], i64 1
+; GFX7-NEXT:    [[VECINS2:%.*]] = insertelement <16 x i8> [[VECINS1]], i8 [[ADD3]], i64 2
+; GFX7-NEXT:    store <16 x i8> [[VECINS2]], ptr [[OUT]], align 16
+; GFX7-NEXT:    ret void
+;
+; GFX8-LABEL: define protected amdgpu_kernel void @arith_3(
+; GFX8-SAME: <16 x i8> [[INVEC:%.*]], ptr [[OUT:%.*]], i32 [[FLAG:%.*]]) #[[ATTR0]] {
+; GFX8-NEXT:  [[ENTRY:.*:]]
+; GFX8-NEXT:    [[EL0:%.*]] = extractelement <16 x i8> [[INVEC]], i64 0
+; GFX8-NEXT:    [[EL1:%.*]] = extractelement <16 x i8> [[INVEC]], i64 1
+; GFX8-NEXT:    [[EL2:%.*]] = extractelement <16 x i8> [[INVEC]], i64 2
+; GFX8-NEXT:    [[MUL2:%.*]] = mul i8 [[EL0]], 1
+; GFX8-NEXT:    [[MUL1:%.*]] = mul i8 [[EL1]], 1
+; GFX8-NEXT:    [[MUL3:%.*]] = mul i8 [[EL2]], 1
+; GFX8-NEXT:    [[ADD2:%.*]] = add i8 [[MUL2]], 1
+; GFX8-NEXT:    [[ADD1:%.*]] = add i8 [[MUL1]], 1
+; GFX8-NEXT:    [[ADD3:%.*]] = add i8 [[MUL3]], 1
+; GFX8-NEXT:    [[VECINS0:%.*]] = insertelement <16 x i8> poison, i8 [[ADD2]], i64 0
+; GFX8-NEXT:    [[VECINS1:%.*]] = insertelement <16 x i8> [[VECINS0]], i8 [[ADD1]], i64 1
+; GFX8-NEXT:    [[VECINS2:%.*]] = insertelement <16 x i8> [[VECINS1]], i8 [[ADD3]], i64 2
+; GFX8-NEXT:    store <16 x i8> [[VECINS2]], ptr [[OUT]], align 16
+; GFX8-NEXT:    ret void
+;
+; GFX9-LABEL: define protected amdgpu_kernel void @arith_3(
+; GFX9-SAME: <16 x i8> [[INVEC:%.*]], ptr [[OUT:%.*]], i32 [[FLAG:%.*]]) #[[ATTR0]] {
+; GFX9-NEXT:  [[ENTRY:.*:]]
+; GFX9-NEXT:    [[EL0:%.*]] = extractelement <16 x i8> [[INVEC]], i64 0
+; GFX9-NEXT:    [[EL1:%.*]] = extractelement <16 x i8> [[INVEC]], i64 1
+; GFX9-NEXT:    [[EL2:%.*]] = extractelement <16 x i8> [[INVEC]], i64 2
+; GFX9-NEXT:    [[MUL2:%.*]] = mul i8 [[EL0]], 1
+; GFX9-NEXT:    [[MUL1:%.*]] = mul i8 [[EL1]], 1
+; GFX9-NEXT:    [[MUL3:%.*]] = mul i8 [[EL2]], 1
+; GFX9-NEXT:    [[ADD2:%.*]] = add i8 [[MUL2]], 1
+; GFX9-NEXT:    [[ADD1:%.*]] = add i8 [[MUL1]], 1
+; GFX9-NEXT:    [[ADD3:%.*]] = add i8 [[MUL3]], 1
+; GFX9-NEXT:    [[VECINS0:%.*]] = insertelement <16 x i8> poison, i8 [[ADD2]], i64 0
+; GFX9-NEXT:    [[VECINS1:%.*]] = insertelement <16 x i8> [[VECINS0]], i8 [[ADD1]], i64 1
+; GFX9-NEXT:    [[VECINS2:%.*]] = insertelement <16 x i8> [[VECINS1]], i8 [[ADD3]], i64 2
+; GFX9-NEXT:    store <16 x i8> [[VECINS2]], ptr [[OUT]], align 16
+; GFX9-NEXT:    ret void
+;
+entry:
+  %el0 = extractelement <16 x i8> %invec, i64  0
+  %el1 = extractelement <16 x i8> %invec, i64  1
+  %el2 = extractelement <16 x i8> %invec, i64  2
+  %mul0 = mul i8 %el0, 1
+  %mul1 = mul i8 %el1, 1
+  %mul2 = mul i8 %el2, 1
+  %add0 = add i8 %mul0, 1
+  %add1 = add i8 %mul1, 1
+  %add2 = add i8 %mul2, 1
+  %vecins0 = insertelement <16 x i8> poison, i8 %add0, i64 0
+  %vecins1 = insertelement <16 x i8> %vecins0, i8 %add1, i64 1
+  %vecins2 = insertelement <16 x i8> %vecins1, i8 %add2, i64 2
+  store <16 x i8> %vecins2, ptr %out
+  ret void
+}
+
+define protected amdgpu_kernel void @arith_4(<16 x i8> %invec, ptr %out, i32 %flag) {
+; GFX7-LABEL: define protected amdgpu_kernel void @arith_4(
+; GFX7-SAME: <16 x i8> [[INVEC:%.*]], ptr [[OUT:%.*]], i32 [[FLAG:%.*]]) #[[ATTR0]] {
+; GFX7-NEXT:  [[ENTRY:.*:]]
+; GFX7-NEXT:    [[EL0:%.*]] = extractelement <16 x i8> [[INVEC]], i64 0
+; GFX7-NEXT:    [[EL1:%.*]] = extractelement <16 x i8> [[INVEC]], i64 1
+; GFX7-NEXT:    [[EL2:%.*]] = extractelement <16 x i8> [[INVEC]], i64 2
+; GFX7-NEXT:    [[EL3:%.*]] = extractelement <16 x i8> [[INVEC]], i64 3
+; GFX7-NEXT:    [[MUL0:%.*]] = mul i8 [[EL0]], 1
+; GFX7-NEXT:    [[MUL1:%.*]] = mul i8 [[EL1]], 1
+; GFX7-NEXT:    [[MUL2:%.*]] = mul i8 [[EL2]], 1
+; GFX7-NEXT:    [[MUL3:%.*]] = mul i8 [[EL3]], 1
+; GFX7-NEXT:    [[ADD0:%.*]] = add i8 [[MUL0]], 1
+; GFX7-NEXT:    [[ADD1:%.*]] = add i8 [[MUL1]], 1
+; GFX7-NEXT:    [[ADD2:%.*]] = add i8 [[MUL2]], 1
+; GFX7-NEXT:    [[ADD3:%.*]] = add i8 [[MUL3]], 1
+; GFX7-NEXT:    [[VECINS0:%.*]] = insertelement <16 x i8> poison, i8 [[ADD0]], i64 0
+; GFX7-NEXT:    [[VECINS1:%.*]] = insertelement <16 x i8> [[VECINS0]], i8 [[ADD1]], i64 1
+; GFX7-NEXT:    [[VECINS2:%.*]] = insertelement <16 x i8> [[VECINS1]], i8 [[ADD2]], i64 2
+; GFX7-NEXT:    [[TMP3:%.*]] = insertelement <16 x i8> [[VECINS2]], i8 [[ADD3]], i64 3
+; GFX7-NEXT:    store <16 x i8> [[TMP3]], ptr [[OUT]], align 16
+; GFX7-NEXT:    ret void
+;
+; GFX8-LABEL: define protected amdgpu_kernel void @arith_4(
+; GFX8-SAME: <16 x i8> [[INVEC:%.*]], ptr [[OUT:%.*]], i32 [[FLAG:%.*]]) #[[ATTR0]] {
+; GFX8-NEXT:  [[ENTRY:.*:]]
+; GFX8-NEXT:    [[EL0:%.*]] = extractelement <16 x i8> [[INVEC]], i64 0
+; GFX8-NEXT:    [[EL1:%.*]] = extractelement <16 x i8> [[INVEC]], i64 1
+; GFX8-NEXT:    [[EL2:%.*]] = extractelement <16 x i8> [[INVEC]], i64 2
+; GFX8-NEXT:    [[EL3:%.*]] = extractelement <16 x i8> [[INVEC]], i64 3
+; GFX8-NEXT:    [[MUL0:%.*]] = mul i8 [[EL0]], 1
+; GFX8-NEXT:    [[MUL1:%.*]] = mul i8 [[EL1]], 1
+; GFX8-NEXT:    [[MUL2:%.*]] = mul i8 [[EL2]], 1
+; GFX8-NEXT:    [[MUL3:%.*]] = mul i8 [[EL3]], 1
+; GFX8-NEXT:    [[ADD0:%.*]] = add i8 [[MUL0]], 1
+; GFX8-NEXT:    [[ADD1:%.*]] = add i8 [[MUL1]], 1
+; GFX8-NEXT:    [[ADD2:%.*]] = add i8 [[MUL2]], 1
+; GFX8-NEXT:    [[ADD3:%.*]] = add i8 [[MUL3]], 1
+; GFX8-NEXT:    [[VECINS0:%.*]] = insertelement <16 x i8> poison, i8 [[ADD0]], i64 0
+; GFX8-NEXT:    [[VECINS1:%.*]] = insertelement <16 x i8> [[VECINS0]], i8 [[ADD1]], i64 1
+; GFX8-NEXT:    [[VECINS2:%.*]] = insertelement <16 x i8> [[VECINS1]], i8 [[ADD2]], i64 2
+; GFX8-NEXT:    [[TMP3:%.*]] = insertelement <16 x i8> [[VECINS2]], i8 [[ADD3]], i64 3
+; GFX8-NEXT:    store <16 x i8> [[TMP3]], ptr [[OUT]], align 16
+; GFX8-NEXT:    ret void
+;
+; GFX9-LABEL: define protected amdgpu_kernel void @arith_4(
+; GFX9-SAME: <16 x i8> [[INVEC:%.*]], ptr [[OUT:%.*]], i32 [[FLAG:%.*]]) #[[ATTR0]] {
+; GFX9-NEXT:  [[ENTRY:.*:]]
+; GFX9-NEXT:    [[EL0:%.*]] = extractelement <16 x i8> [[INVEC]], i64 0
+; GFX9-NEXT:    [[EL1:%.*]] = extractelement <16 x i8> [[INVEC]], i64 1
+; GFX9-NEXT:    [[EL2:%.*]] = extractelement <16 x i8> [[INVEC]], i64 2
+; GFX9-NEXT:    [[EL3:%.*]] = extractelement <16 x i8> [[INVEC]], i64 3
+; GFX9-NEXT:    [[MUL0:%.*]] = mul i8 [[EL0]], 1
+; GFX9-NEXT:    [[MUL1:%.*]] = mul i8 [[EL1]], 1
+; GFX9-NEXT:    [[MUL2:%.*]] = mul i8 [[EL2]], 1
+; GFX9-NEXT:    [[MUL3:%.*]] = mul i8 [[EL3]], 1
+; GFX9-NEXT:    [[ADD0:%.*]] = add i8 [[MUL0]], 1
+; GFX9-NEXT:    [[ADD1:%.*]] = add i8 [[MUL1]], 1
+; GFX9-NEXT:    [[ADD2:%.*]] = add i8 [[MUL2]], 1
+; GFX9-NEXT:    [[ADD3:%.*]] = add i8 [[MUL3]], 1
+; GFX9-NEXT:    [[VECINS0:%.*]] = insertelement <16 x i8> poison, i8 [[ADD0]], i64 0
+; GFX9-NEXT:    [[VECINS1:%.*]] = insertelement <16 x i8> [[VECINS0]], i8 [[ADD1]], i64 1
+; GFX9-NEXT:    [[VECINS2:%.*]] = insertelement <16 x i8> [[VECINS1]], i8 [[ADD2]], i64 2
+; GFX9-NEXT:    [[TMP3:%.*]] = insertelement <16 x i8> [[VECINS2]], i8 [[ADD3]], i64 3
+; GFX9-NEXT:    store <16 x i8> [[TMP3]], ptr [[OUT]], align 16
+; GFX9-NEXT:    ret void
+;
+entry:
+  %el0 = extractelement <16 x i8> %invec, i64  0
+  %el1 = extractelement <16 x i8> %invec, i64  1
+  %el2 = extractelement <16 x i8> %invec, i64  2
+  %el3 = extractelement <16 x i8> %invec, i64  3
+  %mul0 = mul i8 %el0, 1
+  %mul1 = mul i8 %el1, 1
+  %mul2 = mul i8 %el2, 1
+  %mul3 = mul i8 %el3, 1
+  %add0 = add i8 %mul0, 1
+  %add1 = add i8 %mul1, 1
+  %add2 = add i8 %mul2, 1
+  %add3 = add i8 %mul3, 1
+  %vecins0 = insertelement <16 x i8> poison, i8 %add0, i64 0
+  %vecins1 = insertelement <16 x i8> %vecins0, i8 %add1, i64 1
+  %vecins2 = insertelement <16 x i8> %vecins1, i8 %add2, i64 2
+  %vecins3 = insertelement <16 x i8> %vecins2, i8 %add3, i64 3
+  store <16 x i8> %vecins3, ptr %out
+  ret void
+}
+
+define protected amdgpu_kernel void @arith_16(<16 x i8> %invec, ptr %out, i32 %flag) {
+; GFX7-LABEL: define protected amdgpu_kernel void @arith_16(
+; GFX7-SAME: <16 x i8> [[INVEC:%.*]], ptr [[OUT:%.*]], i32 [[FLAG:%.*]]) #[[ATTR0]] {
+; GFX7-NEXT:  [[ENTRY:.*:]]
+; GFX7-NEXT:    [[EL0:%.*]] = extractelement <16 x i8> [[INVEC]], i64 0
+; GFX7-NEXT:    [[EL1:%.*]] = extractelement <16 x i8> [[INVEC]], i64 1
+; GFX7-NEXT:    [[EL2:%.*]] = extractelement <16 x i8> [[INVEC]], i64 2
+; GFX7-NEXT:    [[EL3:%.*]] = extractelement <16 x i8> [[INVEC]], i64 3
+; GFX7-NEXT:    [[EL4:%.*]] = extractelement <16 x i8> [[INVEC]], i64 4
+; GFX7-NEXT:    [[EL5:%.*]] = extractelement <16 x i8> [[INVEC]], i64 5
+; GFX7-NEXT:    [[EL6:%.*]] = extractelement <16 x i8> [[INVEC]], i64 6
+; GFX7-NEXT:    [[EL7:%.*]] = extractelement <16 x i8> [[INVEC]], i64 7
+; GFX7-NEXT:    [[EL8:%.*]] = extractelement <16 x i8> [[INVEC]], i64 8
+; GFX7-NEXT:    [[EL9:%.*]] = extractelement <16 x i8> [[INVEC]], i64 9
+; GFX7-NEXT:    [[EL10:%.*]] = extractelement <16 x i8> [[INVEC]], i64 10
+; GFX7-NEXT:    [[EL11:%.*]] = extractelement <16 x i8> [[INVEC]], i64 11
+; GFX7-NEXT:    [[EL12:%.*]] = extractelement <16 x i8> [[INVEC]], i64 12
+; GFX7-NEXT:    [[EL13:%.*]] = extractelement <16 x i8> [[INVEC]], i64 13
+; GFX7-NEXT:    [[EL14:%.*]] = extractelement <16 x i8> [[INVEC]], i64 14
+; GFX7-NEXT:    [[EL15:%.*]] = extractelement <16 x i8> [[INVEC]], i64 15
+; GFX7-NEXT:    [[MUL0:%.*]] = mul i8 [[EL0]], 1
+; GFX7-NEXT:    [[MUL1:%.*]] = mul i8 [[EL1]], 1
+; GFX7-NEXT:    [[MUL2:%.*]] = mul i8 [[EL2]], 1
+; GFX7-NEXT:    [[MUL3:%.*]] = mul i8 [[EL3]], 1
+; GFX7-NEXT:    [[MUL4:%.*]] = mul i8 [[EL4]], 1
+; GFX7-NEXT:    [[MUL5:%.*]] = mul i8 [[EL5]], 1
+; GFX7-NEXT:    [[MUL6:%.*]] = mul i8 [[EL6]], 1
+; GFX7-NEXT:    [[MUL7:%.*]] = mul i8 [[EL7]], 1
+; GFX7-NEXT:    [[MUL8:%.*]] = mul i8 [[EL8]], 1
+; GFX7-NEXT:    [[MUL9:%.*]] = mul i8 [[EL9]], 1
+; GFX7-NEXT:    [[MUL10:%.*]] = mul i8 [[EL10]], 1
+; GFX7-NEXT:    [[MUL11:%.*]] = mul i8 [[EL11]], 1
+; GFX7-NEXT:    [[MUL12:%.*]] = mul i8 [[EL12]], 1
+; GFX7-NEXT:    [[MUL13:%.*]] = mul i8 [[EL13]], 1
+; GFX7-NEXT:    [[MUL14:%.*]] = mul i8 [[EL14]], 1
+; GFX7-NEXT:    [[MUL15:%.*]] = mul i8 [[EL15]], 1
+; GFX7-NEXT:    [[ADD0:%.*]] = add i8 [[MUL0]], 1
+; GFX7-NEXT:    [[ADD1:%.*]] = add i8 [[MUL1]], 1
+; GFX7-NEXT:    [[ADD2:%.*]] = add i8 [[MUL2]], 1
+; GFX7-NEXT:    [[ADD3:%.*]] = add i8 [[MUL3]], 1
+; GFX7-NEXT:    [[ADD4:%.*]] = add i8 [[MUL4]], 1
+; GFX7-NEXT:    [[ADD5:%.*]] = add i8 [[MUL5]], 1
+; GFX7-NEXT:    [[ADD6:%.*]] = add i8 [[MUL6]], 1
+; GFX7-NEXT:    [[ADD7:%.*]] = add i8 [[MUL7]], 1
+; GFX7-NEXT:    [[ADD8:%.*]] = add i8 [[MUL8]], 1
+; GFX7-NEXT:    [[ADD9:%.*]] = add i8 [[MUL9]], 1
+; GFX7-NEXT:    [[ADD10:%.*]] = add i8 [[MUL10]], 1
+; GFX7-NEXT:    [[ADD11:%.*]] = add i8 [[MUL11]], 1
+; GFX7-NEXT:    [[ADD12:%.*]] = add i8 [[MUL12]], 1
+; GFX7-NEXT:    [[ADD13:%.*]] = add i8 [[MUL13]], 1
+; GFX7-NEXT:    [[ADD14:%.*]] = add i8 [[MUL14]], 1
+; GFX7-NEXT:    [[ADD15:%.*]] = add i8 [[MUL15]], 1
+; GFX7-NEXT:    [[VECINS0:%.*]] = insertelement <16 x i8> poison, i8 [[ADD0]], i64 0
+; GFX7-NEXT:    [[VECINS1:%.*]] = insertelement <16 x i8> [[VECINS0]], i8 [[ADD1]], i64 1
+; GFX7-NEXT:    [[VECINS2:%.*]] = insertelement <16 x i8> [[VECINS1]], i8 [[ADD2]], i64 2
+; GFX7-NEXT:    [[VECINS3:%.*]] = insertelement <16 x i8> [[VECINS2]], i8 [[ADD3]], i64 3
+; GFX7-NEXT:    [[VECINS4:%.*]] = insertelement <16 x i8> [[VECINS3]], i8 [[ADD4]], i64 4
+; GFX7-NEXT:    [[VECINS5:%.*]] = insertelement <16 x i8> [[VECINS4]], i8 [[ADD5]], i64 5
+; GFX7-NEXT:    [[VECINS6:%.*]] = insertelement <16 x i8> [[VECINS5]], i8 [[ADD6]], i64 6
+; GFX7-NEXT:    [[VECINS7:%.*]] = insertelement <16 x i8> [[VECINS6]], i8 [[ADD7]], i64 7
+; GFX7-NEXT:    [[VECINS8:%.*]] = insertelement <16 x i8> [[VECINS7]], i8 [[ADD8]], i64 8
+; GFX7-NEXT:    [[VECINS9:%.*]] = insertelement <16 x i8> [[VECINS8]], i8 [[ADD9]], i64 9
+; GFX7-NEXT:    [[VECINS10:%.*]] = insertelement <16 x i8> [[VECINS9]], i8 [[ADD10]], i64 10
+; GFX7-NEXT:    [[VECINS11:%.*]] = insertelement <16 x i8> [[VECINS10]], i8 [[ADD11]], i64 11
+; GFX7-NEXT:    [[VECINS12:%.*]] = insertelement <16 x i8> [[VECINS11]], i8 [[ADD12]], i64 12
+; GFX7-NEXT:    [[VECINS13:%.*]] = insertelement <16 x i8> [[VECINS12]], i8 [[ADD13]], i64 13
+; GFX7-NEXT:    [[VECINS14:%.*]] = insertelement <16 x i8> [[VECINS13]], i8 [[ADD14]], i64 14
+; GFX7-NEXT:    [[VECINS153:%.*]] = insertelement <16 x i8> [[VECINS14]], i8 [[ADD15]], i64 15
+; GFX7-NEXT:    store <16 x i8> [[VECINS153]], ptr [[OUT]], align 16
+; GFX7-NEXT:    ret void
+;
+; GFX8-LABEL: define protected amdgpu_kernel void @arith_16(
+; GFX8-SAME: <16 x i8> [[INVEC:%.*]], ptr [[OUT:%.*]], i32 [[FLAG:%.*]]) #[[ATTR0]] {
+; GFX8-NEXT:  [[ENTRY:.*:]]
+; GFX8-NEXT:    [[EL0:%.*]] = extractelement <16 x i8> [[INVEC]], i64 0
+; GFX8-NEXT:    [[EL1:%.*]] = extractelement <16 x i8> [[INVEC]], i64 1
+; GFX8-NEXT:    [[EL2:%.*]] = extractelement <16 x i8> [[INVEC]], i64 2
+; GFX8-NEXT:    [[EL3:%.*]] = extractelement <16 x i8> [[INVEC]], i64 3
+; GFX8-NEXT:    [[EL4:%.*]] = extractelement <16 x i8> [[INVEC]], i64 4
+; GFX8-NEXT:    [[EL5:%.*]] = extractelement <16 x i8> [[INVEC]], i64 5
+; GFX8-NEXT:    [[EL6:%.*]] = extractelement <16 x i8> [[INVEC]], i64 6
+; GFX8-NEXT:    [[EL7:%.*]] = extractelement <16 x i8> [[INVEC]], i64 7
+; GFX8-NEXT:    [[EL8:%.*]] = extractelement <16 x i8> [[INVEC]], i64 8
+; GFX8-NEXT:    [[EL9:%.*]] = extractelement <16 x i8> [[INVEC]], i64 9
+; GFX8-NEXT:    [[EL10:%.*]] = extractelement <16 x i8> [[INVEC]], i64 10
+; GFX8-NEXT:    [[EL11:%.*]] = extractelement <16 x i8> [[INVEC]], i64 11
+; GFX8-NEXT:    [[EL12:%.*]] = extractelement <16 x i8> [[INVEC]], i64 12
+; GFX8-NEXT:    [[EL13:%.*]] = extractelement <16 x i8> [[INVEC]], i64 13
+; GFX8-NEXT:    [[EL14:%.*]] = extractelement <16 x i8> [[INVEC]], i64 14
+; GFX8-NEXT:    [[EL15:%.*]] = extractelement <16 x i8> [[INVEC]], i64 15
+; GFX8-NEXT:    [[MUL0:%.*]] = mul i8 [[EL0]], 1
+; GFX8-NEXT:    [[MUL1:%.*]] = mul i8 [[EL1]], 1
+; GFX8-NEXT:    [[MUL2:%.*]] = mul i8 [[EL2]], 1
+; GFX8-NEXT:    [[MUL3:%.*]] = mul i8 [[EL3]], 1
+; GFX8-NEXT:    [[MUL4:%.*]] = mul i8 [[EL4]], 1
+; GFX8-NEXT:    [[MUL5:%.*]] = mul i8 [[EL5]], 1
+; GFX8-NEXT:    [[MUL6:%.*]] = mul i8 [[EL6]], 1
+; GFX8-NEXT:    [[MUL7:%.*]] = mul i8 [[EL7]], 1
+; GFX8-NEXT:    [[MUL8:%.*]] = mul i8 [[EL8]], 1
+; GFX8-NEXT:    [[MUL9:%.*]] = mul i8 [[EL9]], 1
+; GFX8-NEXT:    [[MUL10:%.*]] = mul i8 [[EL10]], 1
+; GFX8-NEXT:    [[MUL11:%.*]] = mul i8 [[EL11]], 1
+; GFX8-NEXT:    [[MUL12:%.*]] = mul i8 [[EL12]], 1
+; GFX8-NEXT:    [[MUL13:%.*]] = mul i8 [[EL13]], 1
+; GFX8-NEXT:    [[MUL14:%.*]] = mul i8 [[EL14]], 1
+; GFX8-NEXT:    [[MUL15:%.*]] = mul i8 [[EL15]], 1
+; GFX8-NEXT:    [[ADD0:%.*]] = add i8 [[MUL0]], 1
+; GFX8-NEXT:    [[ADD1:%.*]] = add i8 [[MUL1]], 1
+; GFX8-NEXT:    [[ADD2:%.*]] = add i8 [[MUL2]], 1
+; GFX8-NEXT:    [[ADD3:%.*]] = add i8 [[MUL3]], 1
+; GFX8-NEXT:    [[ADD4:%.*]] = add i8 [[MUL4]], 1
+; GFX8-NEXT:    [[ADD5:%.*]] = add i8 [[MUL5]], 1
+; GFX8-NEXT:    [[ADD6:%.*]] = add i8 [[MUL6]], 1
+; GFX8-NEXT:    [[ADD7:%.*]] = add i8 [[MUL7]], 1
+; GFX8-NEXT:    [[ADD8:%.*]] = add i8 [[MUL8]], 1
+; GFX8-NEXT:    [[ADD9:%.*]] = add i8 [[MUL9]], 1
+; GFX8-NEXT:    [[ADD10:%.*]] = add i8 [[MUL10]], 1
+; GFX8-NEXT:    [[ADD11:%.*]] = add i8 [[MUL11]], 1
+; GFX8-NEXT:    [[ADD12:%.*]] = add i8 [[MUL12]], 1
+; GFX8-NEXT:    [[ADD13:%.*]] = add i8 [[MUL13]], 1
+; GFX8-NEXT:    [[ADD14:%.*]] = add i8 [[MUL14]], 1
+; GFX8-NEXT:    [[ADD15:%.*]] = add i8 [[MUL15]], 1
+; GFX8-NEXT:    [[VECINS0:%.*]] = insertelement <16 x i8> poison, i8 [[ADD0]], i64 0
+; GFX8-NEXT:    [[VECINS1:%.*]] = insertelement <16 x i8> [[VECINS0]], i8 [[ADD1]], i64 1
+; GFX8-NEXT:    [[VECINS2:%.*]] = insertelement <16 x i8> [[VECINS1]], i8 [[ADD2]], i64 2
+; GFX8-NEXT:    [[VECINS3:%.*]] = insertelement <16 x i8> [[VECINS2]], i8 [[ADD3]], i64 3
+; GFX8-NEXT:    [[VECINS4:%.*]] = insertelement <16 x i8> [[VECINS3]], i8 [[ADD4]], i64 4
+; GFX8-NEXT:    [[VECINS5:%.*]] = insertelement <16 x i8> [[VECINS4]], i8 [[ADD5]], i64 5
+; GFX8-NEXT:    [[VECINS6:%.*]] = insertelement <16 x i8> [[VECINS5]], i8 [[ADD6]], i64 6
+; GFX8-NEXT:    [[VECINS7:%.*]] = insertelement <16 x i8> [[VECINS6]], i8 [[ADD7]], i64 7
+; GFX8-NEXT:    [[VECINS8:%.*]] = insertelement <16 x i8> [[VECINS7]], i8 [[ADD8]], i64 8
+; GFX8-NEXT:    [[VECINS9:%.*]] = insertelement <16 x i8> [[VECINS8]], i8 [[ADD9]], i64 9
+; GFX8-NEXT:    [[VECINS10:%.*]] = insertelement <16 x i8> [[VECINS9]], i8 [[ADD10]], i64 10
+; GFX8-NEXT:    [[VECINS11:%.*]] = insertelement <16 x i8> [[VECINS10]], i8 [[ADD11]], i64 11
+; GFX8-NEXT:    [[VECINS12:%.*]] = insertelement <16 x i8> [[VECINS11]], i8 [[ADD12]], i64 12
+; GFX8-NEXT:    [[VECINS13:%.*]] = insertelement <16 x i8> [[VECINS12]], i8 [[ADD13]], i64 13
+; GFX8-NEXT:    [[VECINS14:%.*]] = insertelement <16 x i8> [[VECINS13]], i8 [[ADD14]], i64 14
+; GFX8-NEXT:    [[VECINS153:%.*]] = insertelement <16 x i8> [[VECINS14]], i8 [[ADD15]], i64 15
+; GFX8-NEXT:    store <16 x i8> [[VECINS153]], ptr [[OUT]], align 16
+; GFX8-NEXT:    ret void
+;
+; GFX9-LABEL: define protected amdgpu_kernel void @arith_16(
+; GFX9-SAME: <16 x i8> [[INVEC:%.*]], ptr [[OUT:%.*]], i32 [[FLAG:%.*]]) #[[ATTR0]] {
+; GFX9-NEXT:  [[ENTRY:.*:]]
+; GFX9-NEXT:    [[EL0:%.*]] = extractelement <16 x i8> [[INVEC]], i64 0
+; GFX9-NEXT:    [[EL1:%.*]] = extractelement <16 x i8> [[INVEC]], i64 1
+; GFX9-NEXT:    [[EL2:%.*]] = extractelement <16 x i8> [[INVEC]], i64 2
+; GFX9-NEXT:    [[EL3:%.*]] = extractelement <16 x i8> [[INVEC]], i64 3
+; GFX9-NEXT:    [[EL4:%.*]] = extractelement <16 x i8> [[INVEC]], i64 4
+; GFX9-NEXT:    [[EL5:%.*]] = extractelement <16 x i8> [[INVEC]], i64 5
+; GFX9-NEXT:    [[EL6:%.*]] = extractelement <16 x i8> [[INVEC]], i64 6
+; GFX9-NEXT:    [[EL7:%.*]] = extractelement <16 x i8> [[INVEC]], i64 7
+; GFX9-NEXT:    [[EL8:%.*]] = extractelement <16 x i8> [[INVEC]], i64 8
+; GFX9-NEXT:    [[EL9:%.*]] = extractelement <16 x i8> [[INVEC]], i64 9
+; GFX9-NEXT:    [[EL10:%.*]] = extractelement <16 x i8> [[INVEC]], i64 10
+; GFX9-NEXT:    [[EL11:%.*]] = extractelement <16 x i8> [[INVEC]], i64 11
+; GFX9-NEXT:    [[EL12:%.*]] = extractelement <16 x i8> [[INVEC]], i64 12
+; GFX9-NEXT:    [[EL13:%.*]] = extractelement <16 x i8> [[INVEC]], i64 13
+; GFX9-NEXT:    [[EL14:%.*]] = extractelement <16 x i8> [[INVEC]], i64 14
+; GFX9-NEXT:    [[EL15:%.*]] = extractelement <16 x i8> [[INVEC]], i64 15
+; GFX9-NEXT:    [[MUL0:%.*]] = mul i8 [[EL0]], 1
+; GFX9-NEXT:    [[MUL1:%.*]] = mul i8 [[EL1]], 1
+; GFX9-NEXT:    [[MUL2:%.*]] = mul i8 [[EL2]], 1
+; GFX9-NEXT:    [[MUL3:%.*]] = mul i8 [[EL3]], 1
+; GFX9-NEXT:    [[MUL4:%.*]] = mul i8 [[EL4]], 1
+; GFX9-NEXT:    [[MUL5:%.*]] = mul i8 [[EL5]], 1
+; GFX9-NEXT:    [[MUL6:%.*]] = mul i8 [[EL6]], 1
+; GFX9-NEXT:    [[MUL7:%.*]] = mul i8 [[EL7]], 1
+; GFX9-NEXT:    [[MUL8:%.*]] = mul i8 [[EL8]], 1
+; GFX9-NEXT:    [[MUL9:%.*]] = mul i8 [[EL9]], 1
+; GFX9-NEXT:    [[MUL10:%.*]] = mul i8 [[EL10]], 1
+; GFX9-NEXT:    [[MUL11:%.*]] = mul i8 [[EL11]], 1
+; GFX9-NEXT:    [[MUL12:%.*]] = mul i8 [[EL12]], 1
+; GFX9-NEXT:    [[MUL13:%.*]] = mul i8 [[EL13]], 1
+; GFX9-NEXT:    [[MUL14:%.*]] = mul i8 [[EL14]], 1
+; GFX9-NEXT:    [[MUL15:%.*]] = mul i8 [[EL15]], 1
+; GFX9-NEXT:    [[ADD0:%.*]] = add i8 [[MUL0]], 1
+; GFX9-NEXT:    [[ADD1:%.*]] = add i8 [[MUL1]], 1
+; GFX9-NEXT:    [[ADD2:%.*]] = add i8 [[MUL2]], 1
+; GFX9-NEXT:    [[ADD3:%.*]] = add i8 [[MUL3]], 1
+; GFX9-NEXT:    [[ADD4:%.*]] = add i8 [[MUL4]], 1
+; GFX9-NEXT:    [[ADD5:%.*]] = add i8 [[MUL5]], 1
+; GFX9-NEXT:    [[ADD6:%.*]] = add i8 [[MUL6]], 1
+; GFX9-NEXT:    [[ADD7:%.*]] = add i8 [[MUL7]], 1
+; GFX9-NEXT:    [[ADD8:%.*]] = add i8 [[MUL8]], 1
+; GFX9-NEXT:    [[ADD9:%.*]] = add i8 [[MUL9]], 1
+; GFX9-NEXT:    [[ADD10:%.*]] = add i8 [[MUL10]], 1
+; GFX9-NEXT:    [[ADD11:%.*]] = add i8 [[MUL11]], 1
+; GFX9-NEXT:    [[ADD12:%.*]] = add i8 [[MUL12]], 1
+; GFX9-NEXT:    [[ADD13:%.*]] = add i8 [[MUL13]], 1
+; GFX9-NEXT:    [[ADD14:%.*]] = add i8 [[MUL14]], 1
+; GFX9-NEXT:    [[ADD15:%.*]] = add i8 [[MUL15]], 1
+; GFX9-NEXT:    [[VECINS0:%.*]] = insertelement <16 x i8> poison, i8 [[ADD0]], i64 0
+; GFX9-NEXT:    [[VECINS1:%.*]] = insertelement <16 x i8> [[VECINS0]], i8 [[ADD1]], i64 1
+; GFX9-NEXT:    [[VECINS2:%.*]] = insertelement <16 x i8> [[VECINS1]], i8 [[ADD2]], i64 2
+; GFX9-NEXT:    [[VECINS3:%.*]] = insertelement <16 x i8> [[VECINS2]], i8 [[ADD3]], i64 3
+; GFX9-NEXT:    [[VECINS4:%.*]] = insertelement <16 x i8> [[VECINS3]], i8 [[ADD4]], i64 4
+; GFX9-NEXT:    [[VECINS5:%.*]] = insertelement <16 x i8> [[VECINS4]], i8 [[ADD5]], i64 5
+; GFX9-NEXT:    [[VECINS6:%.*]] = insertelement <16 x i8> [[VECINS5]], i8 [[ADD6]], i64 6
+; GFX9-NEXT:    [[VECINS7:%.*]] = insertelement <16 x i8> [[VECINS6]], i8 [[ADD7]], i64 7
+; GFX9-NEXT:    [[VECINS8:%.*]] = insertelement <16 x i8> [[VECINS7]], i8 [[ADD8]], i64 8
+; GFX9-NEXT:    [[VECINS9:%.*]] = insertelement <16 x i8> [[VECINS8]], i8 [[ADD9]], i64 9
+; GFX9-NEXT:    [[VECINS10:%.*]] = insertelement <16 x i8> [[VECINS9]], i8 [[ADD10]], i64 10
+; GFX9-NEXT:    [[VECINS11:%.*]] = insertelement <16 x i8> [[VECINS10]], i8 [[ADD11]], i64 11
+; GFX9-NEXT:    [[VECINS12:%.*]] = insertelement <16 x i8> [[VECINS11]], i8 [[ADD12]], i64 12
+; GFX9-NEXT:    [[VECINS13:%.*]] = insertelement <16 x i8> [[VECINS12]], i8 [[ADD13]], i64 13
+; GFX9-NEXT:    [[VECINS14:%.*]] = insertelement <16 x i8> [[VECINS13]], i8 [[ADD14]], i64 14
+; GFX9-NEXT:    [[VECINS153:%.*]] = insertelement <16 x i8> [[VECINS14]], i8 [[ADD15]], i64 15
+; GFX9-NEXT:    store <16 x i8> [[VECINS153]], ptr [[OUT]], align 16
+; GFX9-NEXT:    ret void
+;
+entry:
+  %el0 = extractelement <16 x i8> %invec, i64  0
+  %el1 = extractelement <16 x i8> %invec, i64  1
+  %el2 = extractelement <16 x i8> %invec, i64  2
+  %el3 = extractelement <16 x i8> %invec, i64  3
+  %el4 = extractelement <16 x i8> %invec, i64  4
+  %el5 = extractelement <16 x i8> %invec, i64  5
+  %el6 = extractelement <16 x i8> %invec, i64  6
+  %el7 = extractelement <16 x i8> %invec, i64  7
+  %el8 = extractelement <16 x i8> %invec, i64  8
+  %el9 = extractelement <16 x i8> %invec, i64  9
+  %el10 = extractelement <16 x i8> %invec, i64 10
+  %el11 = extractelement <16 x i8> %invec, i64 11
+  %el12 = extractelement <16 x i8> %invec, i64 12
+  %el13 = extractelement <16 x i8> %invec, i64 13
+  %el14 = extractelement <16 x i8> %invec, i64 14
+  %el15 = extractelement <16 x i8> %invec, i64 15
+  %mul0 = mul i8 %el0, 1
+  %mul1 = mul i8 %el1, 1
+  %mul2 = mul i8 %el2, 1
+  %mul3 = mul i8 %el3, 1
+  %mul4 = mul i8 %el4, 1
+  %mul5 = mul i8 %el5, 1
+  %mul6 = mul i8 %el6, 1
+  %mul7 = mul i8 %el7, 1
+  %mul8 = mul i8 %el8, 1
+  %mul9 = mul i8 %el9, 1
+  %mul10 = mul i8 %el10, 1
+  %mul11 = mul i8 %el11, 1
+  %mul12 = mul i8 %el12, 1
+  %mul13 = mul i8 %el13, 1
+  %mul14 = mul i8 %el14, 1
+  %mul15 = mul i8 %el15, 1
+  %add0 = add i8 %mul0, 1
+  %add1 = add i8 %mul1, 1
+  %add2 = add i8 %mul2, 1
+  %add3 = add i8 %mul3, 1
+  %add4 = add i8 %mul4, 1
+  %add5 = add i8 %mul5, 1
+  %add6 = add i8 %mul6, 1
+  %add7 = add i8 %mul7, 1
+  %add8 = add i8 %mul8, 1
+  %add9 = add i8 %mul9, 1
+  %add10 = add i8 %mul10, 1
+  %add11 = add i8 %mul11, 1
+  %add12 = add i8 %mul12, 1
+  %add13 = add i8 %mul13, 1
+  %add14 = add i8 %mul14, 1
+  %add15 = add i8 %mul15, 1
+  %vecins0 = insertelement <16 x i8> poison, i8 %add0, i64 0
+  %vecins1 = insertelement <16 x i8> %vecins0, i8 %add1, i64 1
+  %vecins2 = insertelement <16 x i8> %vecins1, i8 %add2, i64 2
+  %vecins3 = insertelement <16 x i8> %vecins2, i8 %add3, i64 3
+  %vecins4 = insertelement <16 x i8> %vecins3, i8 %add4, i64 4
+  %vecins5 = insertelement <16 x i8> %vecins4, i8 %add5, i64 5
+  %vecins6 = insertelement <16 x i8> %vecins5, i8 %add6, i64 6
+  %vecins7 = insertelement <16 x i8> %vecins6, i8 %add7, i64 7
+  %vecins8 = insertelement <16 x i8> %vecins7, i8 %add8, i64 8
+  %vecins9 = insertelement <16 x i8> %vecins8, i8 %add9, i64 9
+  %vecins10 = insertelement <16 x i8> %vecins9, i8 %add10, i64 10
+  %vecins11 = insertelement <16 x i8> %vecins10, i8 %add11, i64 11
+  %vecins12 = insertelement <16 x i8> %vecins11, i8 %add12, i64 12
+  %vecins13 = insertelement <16 x i8> %vecins12, i8 %add13, i64 13
+  %vecins14 = insertelement <16 x i8> %vecins13, i8 %add14, i64 14
+  %vecins15 = insertelement <16 x i8> %vecins14, i8 %add15, i64 15
+  store <16 x i8> %vecins15, ptr %out
+  ret void
+}
+
+define protected amdgpu_kernel void @phi_1(ptr addrspace(3) %inptr0, ptr addrspace(3) %inptr1, ptr %out, ptr %out1, i32 %flag) {
----------------
jrbyrnes wrote:

Ditto

https://github.com/llvm/llvm-project/pull/138801


More information about the llvm-commits mailing list