[llvm] 2fa6c52 - AMDGPU: Add baseline tests for simplify elts of readfirstlane (#128645)

via llvm-commits llvm-commits at lists.llvm.org
Thu Feb 27 21:57:02 PST 2025


Author: Matt Arsenault
Date: 2025-02-28T12:56:58+07:00
New Revision: 2fa6c5265eda03925cef217f388a11a2a1616c54

URL: https://github.com/llvm/llvm-project/commit/2fa6c5265eda03925cef217f388a11a2a1616c54
DIFF: https://github.com/llvm/llvm-project/commit/2fa6c5265eda03925cef217f388a11a2a1616c54.diff

LOG: AMDGPU: Add baseline tests for simplify elts of readfirstlane (#128645)

Added: 
    llvm/test/Transforms/InstCombine/AMDGPU/simplify-demanded-vector-elts-lane-intrinsics.ll

Modified: 
    

Removed: 
    


################################################################################
diff  --git a/llvm/test/Transforms/InstCombine/AMDGPU/simplify-demanded-vector-elts-lane-intrinsics.ll b/llvm/test/Transforms/InstCombine/AMDGPU/simplify-demanded-vector-elts-lane-intrinsics.ll
new file mode 100644
index 0000000000000..83d9d0d032ed1
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/AMDGPU/simplify-demanded-vector-elts-lane-intrinsics.ll
@@ -0,0 +1,444 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -passes=instcombine < %s | FileCheck %s
+
+define i16 @extract_elt0_v2i16_readfirstlane(<2 x i16> %src) {
+; CHECK-LABEL: define i16 @extract_elt0_v2i16_readfirstlane(
+; CHECK-SAME: <2 x i16> [[SRC:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:    [[VEC:%.*]] = call <2 x i16> @llvm.amdgcn.readfirstlane.v2i16(<2 x i16> [[SRC]])
+; CHECK-NEXT:    [[ELT:%.*]] = extractelement <2 x i16> [[VEC]], i64 0
+; CHECK-NEXT:    ret i16 [[ELT]]
+;
+  %vec = call <2 x i16> @llvm.amdgcn.readfirstlane.v2i16(<2 x i16> %src)
+  %elt = extractelement <2 x i16> %vec, i32 0
+  ret i16 %elt
+}
+
+define i16 @extract_elt0_v1i16_readfirstlane(<1 x i16> %src) {
+; CHECK-LABEL: define i16 @extract_elt0_v1i16_readfirstlane(
+; CHECK-SAME: <1 x i16> [[SRC:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[VEC:%.*]] = call <1 x i16> @llvm.amdgcn.readfirstlane.v1i16(<1 x i16> [[SRC]])
+; CHECK-NEXT:    [[ELT:%.*]] = extractelement <1 x i16> [[VEC]], i64 0
+; CHECK-NEXT:    ret i16 [[ELT]]
+;
+  %vec = call <1 x i16> @llvm.amdgcn.readfirstlane.v1i16(<1 x i16> %src)
+  %elt = extractelement <1 x i16> %vec, i32 0
+  ret i16 %elt
+}
+
+define i16 @extract_elt1_v2i16_readfirstlane(<2 x i16> %src) {
+; CHECK-LABEL: define i16 @extract_elt1_v2i16_readfirstlane(
+; CHECK-SAME: <2 x i16> [[SRC:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[VEC:%.*]] = call <2 x i16> @llvm.amdgcn.readfirstlane.v2i16(<2 x i16> [[SRC]])
+; CHECK-NEXT:    [[ELT:%.*]] = extractelement <2 x i16> [[VEC]], i64 1
+; CHECK-NEXT:    ret i16 [[ELT]]
+;
+  %vec = call <2 x i16> @llvm.amdgcn.readfirstlane.v2i16(<2 x i16> %src)
+  %elt = extractelement <2 x i16> %vec, i32 1
+  ret i16 %elt
+}
+
+define i16 @extract_elt0_v4i16_readfirstlane(<4 x i16> %src) {
+; CHECK-LABEL: define i16 @extract_elt0_v4i16_readfirstlane(
+; CHECK-SAME: <4 x i16> [[SRC:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[VEC:%.*]] = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> [[SRC]])
+; CHECK-NEXT:    [[ELT:%.*]] = extractelement <4 x i16> [[VEC]], i64 0
+; CHECK-NEXT:    ret i16 [[ELT]]
+;
+  %vec = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> %src)
+  %elt = extractelement <4 x i16> %vec, i32 0
+  ret i16 %elt
+}
+
+define i16 @extract_elt2_v4i16_readfirstlane(<4 x i16> %src) {
+; CHECK-LABEL: define i16 @extract_elt2_v4i16_readfirstlane(
+; CHECK-SAME: <4 x i16> [[SRC:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[VEC:%.*]] = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> [[SRC]])
+; CHECK-NEXT:    [[ELT:%.*]] = extractelement <4 x i16> [[VEC]], i64 2
+; CHECK-NEXT:    ret i16 [[ELT]]
+;
+  %vec = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> %src)
+  %elt = extractelement <4 x i16> %vec, i32 2
+  ret i16 %elt
+}
+
+define <2 x i16> @extract_elt01_v4i16_readfirstlane(<4 x i16> %src) {
+; CHECK-LABEL: define <2 x i16> @extract_elt01_v4i16_readfirstlane(
+; CHECK-SAME: <4 x i16> [[SRC:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[VEC:%.*]] = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> [[SRC]])
+; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[VEC]], <4 x i16> poison, <2 x i32> <i32 0, i32 1>
+; CHECK-NEXT:    ret <2 x i16> [[SHUFFLE]]
+;
+  %vec = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> %src)
+  %shuffle = shufflevector <4 x i16> %vec, <4 x i16> poison, <2 x i32> <i32 0, i32 1>
+  ret <2 x i16> %shuffle
+}
+
+define <2 x i16> @extract_elt12_v4i16_readfirstlane(<4 x i16> %src) {
+; CHECK-LABEL: define <2 x i16> @extract_elt12_v4i16_readfirstlane(
+; CHECK-SAME: <4 x i16> [[SRC:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[VEC:%.*]] = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> [[SRC]])
+; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[VEC]], <4 x i16> poison, <2 x i32> <i32 1, i32 2>
+; CHECK-NEXT:    ret <2 x i16> [[SHUFFLE]]
+;
+  %vec = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> %src)
+  %shuffle = shufflevector <4 x i16> %vec, <4 x i16> poison, <2 x i32> <i32 1, i32 2>
+  ret <2 x i16> %shuffle
+}
+
+define <2 x i16> @extract_elt23_v4i16_readfirstlane(<4 x i16> %src) {
+; CHECK-LABEL: define <2 x i16> @extract_elt23_v4i16_readfirstlane(
+; CHECK-SAME: <4 x i16> [[SRC:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[VEC:%.*]] = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> [[SRC]])
+; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[VEC]], <4 x i16> poison, <2 x i32> <i32 2, i32 3>
+; CHECK-NEXT:    ret <2 x i16> [[SHUFFLE]]
+;
+  %vec = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> %src)
+  %shuffle = shufflevector <4 x i16> %vec, <4 x i16> poison, <2 x i32> <i32 2, i32 3>
+  ret <2 x i16> %shuffle
+}
+
+define <2 x i16> @extract_elt10_v4i16_readfirstlane(<4 x i16> %src) {
+; CHECK-LABEL: define <2 x i16> @extract_elt10_v4i16_readfirstlane(
+; CHECK-SAME: <4 x i16> [[SRC:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[VEC:%.*]] = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> [[SRC]])
+; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[VEC]], <4 x i16> poison, <2 x i32> <i32 1, i32 0>
+; CHECK-NEXT:    ret <2 x i16> [[SHUFFLE]]
+;
+  %vec = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> %src)
+  %shuffle = shufflevector <4 x i16> %vec, <4 x i16> poison, <2 x i32> <i32 1, i32 0>
+  ret <2 x i16> %shuffle
+}
+
+define <2 x i16> @extract_elt32_v4i16_readfirstlane(<4 x i16> %src) {
+; CHECK-LABEL: define <2 x i16> @extract_elt32_v4i16_readfirstlane(
+; CHECK-SAME: <4 x i16> [[SRC:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[VEC:%.*]] = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> [[SRC]])
+; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[VEC]], <4 x i16> poison, <2 x i32> <i32 3, i32 2>
+; CHECK-NEXT:    ret <2 x i16> [[SHUFFLE]]
+;
+  %vec = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> %src)
+  %shuffle = shufflevector <4 x i16> %vec, <4 x i16> poison, <2 x i32> <i32 3, i32 2>
+  ret <2 x i16> %shuffle
+}
+
+define <2 x i16> @extract_elt30_v4i16_readfirstlane(<4 x i16> %src) {
+; CHECK-LABEL: define <2 x i16> @extract_elt30_v4i16_readfirstlane(
+; CHECK-SAME: <4 x i16> [[SRC:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[VEC:%.*]] = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> [[SRC]])
+; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[VEC]], <4 x i16> poison, <2 x i32> <i32 3, i32 0>
+; CHECK-NEXT:    ret <2 x i16> [[SHUFFLE]]
+;
+  %vec = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> %src)
+  %shuffle = shufflevector <4 x i16> %vec, <4 x i16> poison, <2 x i32> <i32 3, i32 0>
+  ret <2 x i16> %shuffle
+}
+
+define half @extract_elt0_v2f16_readfirstlane(<2 x half> %src) {
+; CHECK-LABEL: define half @extract_elt0_v2f16_readfirstlane(
+; CHECK-SAME: <2 x half> [[SRC:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[VEC:%.*]] = call <2 x half> @llvm.amdgcn.readfirstlane.v2f16(<2 x half> [[SRC]])
+; CHECK-NEXT:    [[ELT:%.*]] = extractelement <2 x half> [[VEC]], i64 0
+; CHECK-NEXT:    ret half [[ELT]]
+;
+  %vec = call <2 x half> @llvm.amdgcn.readfirstlane.v2i16(<2 x half> %src)
+  %elt = extractelement <2 x half> %vec, i32 0
+  ret half %elt
+}
+
+define half @extract_elt1_v2f16_readfirstlane(<2 x half> %src) {
+; CHECK-LABEL: define half @extract_elt1_v2f16_readfirstlane(
+; CHECK-SAME: <2 x half> [[SRC:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[VEC:%.*]] = call <2 x half> @llvm.amdgcn.readfirstlane.v2f16(<2 x half> [[SRC]])
+; CHECK-NEXT:    [[ELT:%.*]] = extractelement <2 x half> [[VEC]], i64 1
+; CHECK-NEXT:    ret half [[ELT]]
+;
+  %vec = call <2 x half> @llvm.amdgcn.readfirstlane.v2i16(<2 x half> %src)
+  %elt = extractelement <2 x half> %vec, i32 1
+  ret half %elt
+}
+
+; Don't break on illegal types
+define i8 @extract_elt0_v4i8_readfirstlane(<4 x i8> %src) {
+; CHECK-LABEL: define i8 @extract_elt0_v4i8_readfirstlane(
+; CHECK-SAME: <4 x i8> [[SRC:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[VEC:%.*]] = call <4 x i8> @llvm.amdgcn.readfirstlane.v4i8(<4 x i8> [[SRC]])
+; CHECK-NEXT:    [[ELT:%.*]] = extractelement <4 x i8> [[VEC]], i64 0
+; CHECK-NEXT:    ret i8 [[ELT]]
+;
+  %vec = call <4 x i8> @llvm.amdgcn.readfirstlane.v4ii8(<4 x i8> %src)
+  %elt = extractelement <4 x i8> %vec, i32 0
+  ret i8 %elt
+}
+
+; Don't break on illegal types
+define i32 @extract_elt0_nxv4i32_readfirstlane(<vscale x 2 x i32> %src) {
+; CHECK-LABEL: define i32 @extract_elt0_nxv4i32_readfirstlane(
+; CHECK-SAME: <vscale x 2 x i32> [[SRC:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[VEC:%.*]] = call <vscale x 2 x i32> @llvm.amdgcn.readfirstlane.nxv2i32(<vscale x 2 x i32> [[SRC]])
+; CHECK-NEXT:    [[ELT:%.*]] = extractelement <vscale x 2 x i32> [[VEC]], i64 0
+; CHECK-NEXT:    ret i32 [[ELT]]
+;
+  %vec = call <vscale x 2 x i32> @llvm.amdgcn.readfirstlane.nxv2i32(<vscale x 2 x i32> %src)
+  %elt = extractelement <vscale x 2 x i32> %vec, i32 0
+  ret i32 %elt
+}
+
+define i32 @extract_elt0_v2i32_readfirstlane(<2 x i32> %src) {
+; CHECK-LABEL: define i32 @extract_elt0_v2i32_readfirstlane(
+; CHECK-SAME: <2 x i32> [[SRC:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[VEC:%.*]] = call <2 x i32> @llvm.amdgcn.readfirstlane.v2i32(<2 x i32> [[SRC]])
+; CHECK-NEXT:    [[ELT:%.*]] = extractelement <2 x i32> [[VEC]], i64 0
+; CHECK-NEXT:    ret i32 [[ELT]]
+;
+  %vec = call <2 x i32> @llvm.amdgcn.readfirstlane.v2i32(<2 x i32> %src)
+  %elt = extractelement <2 x i32> %vec, i32 0
+  ret i32 %elt
+}
+
+define ptr addrspace(3) @extract_elt0_v2p3_readfirstlane(<2 x ptr addrspace(3)> %src) {
+; CHECK-LABEL: define ptr addrspace(3) @extract_elt0_v2p3_readfirstlane(
+; CHECK-SAME: <2 x ptr addrspace(3)> [[SRC:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[VEC:%.*]] = call <2 x ptr addrspace(3)> @llvm.amdgcn.readfirstlane.v2p3(<2 x ptr addrspace(3)> [[SRC]])
+; CHECK-NEXT:    [[ELT:%.*]] = extractelement <2 x ptr addrspace(3)> [[VEC]], i64 0
+; CHECK-NEXT:    ret ptr addrspace(3) [[ELT]]
+;
+  %vec = call <2 x ptr addrspace(3)> @llvm.amdgcn.readfirstlane.v2p3(<2 x ptr addrspace(3)> %src)
+  %elt = extractelement <2 x ptr addrspace(3)> %vec, i32 0
+  ret ptr addrspace(3) %elt
+}
+
+define i64 @extract_elt0_v2i64_readfirstlane(<2 x i64> %src) {
+; CHECK-LABEL: define i64 @extract_elt0_v2i64_readfirstlane(
+; CHECK-SAME: <2 x i64> [[SRC:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[VEC:%.*]] = call <2 x i64> @llvm.amdgcn.readfirstlane.v2i64(<2 x i64> [[SRC]])
+; CHECK-NEXT:    [[ELT:%.*]] = extractelement <2 x i64> [[VEC]], i64 0
+; CHECK-NEXT:    ret i64 [[ELT]]
+;
+  %vec = call <2 x i64> @llvm.amdgcn.readfirstlane.v2i64(<2 x i64> %src)
+  %elt = extractelement <2 x i64> %vec, i32 0
+  ret i64 %elt
+}
+
+define i64 @extract_elt1_v2i64_readfirstlane(<2 x i64> %src) {
+; CHECK-LABEL: define i64 @extract_elt1_v2i64_readfirstlane(
+; CHECK-SAME: <2 x i64> [[SRC:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[VEC:%.*]] = call <2 x i64> @llvm.amdgcn.readfirstlane.v2i64(<2 x i64> [[SRC]])
+; CHECK-NEXT:    [[ELT:%.*]] = extractelement <2 x i64> [[VEC]], i64 1
+; CHECK-NEXT:    ret i64 [[ELT]]
+;
+  %vec = call <2 x i64> @llvm.amdgcn.readfirstlane.v2i64(<2 x i64> %src)
+  %elt = extractelement <2 x i64> %vec, i32 1
+  ret i64 %elt
+}
+
+define <3 x i16> @extract_elt012_v4i16_readfirstlane(<4 x i16> %src) {
+; CHECK-LABEL: define <3 x i16> @extract_elt012_v4i16_readfirstlane(
+; CHECK-SAME: <4 x i16> [[SRC:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[VEC:%.*]] = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> [[SRC]])
+; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[VEC]], <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
+; CHECK-NEXT:    ret <3 x i16> [[SHUFFLE]]
+;
+  %vec = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> %src)
+  %shuffle = shufflevector <4 x i16> %vec, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
+  ret <3 x i16> %shuffle
+}
+
+define <3 x i16> @extract_elt123_v4i16_readfirstlane(<4 x i16> %src) {
+; CHECK-LABEL: define <3 x i16> @extract_elt123_v4i16_readfirstlane(
+; CHECK-SAME: <4 x i16> [[SRC:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[VEC:%.*]] = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> [[SRC]])
+; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[VEC]], <4 x i16> poison, <3 x i32> <i32 1, i32 2, i32 3>
+; CHECK-NEXT:    ret <3 x i16> [[SHUFFLE]]
+;
+  %vec = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> %src)
+  %shuffle = shufflevector <4 x i16> %vec, <4 x i16> poison, <3 x i32> <i32 1, i32 2, i32 3>
+  ret <3 x i16> %shuffle
+}
+
+define <3 x i32> @extract_elt012_v4i32_readfirstlane(<4 x i32> %src) {
+; CHECK-LABEL: define <3 x i32> @extract_elt012_v4i32_readfirstlane(
+; CHECK-SAME: <4 x i32> [[SRC:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[VEC:%.*]] = call <4 x i32> @llvm.amdgcn.readfirstlane.v4i32(<4 x i32> [[SRC]])
+; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[VEC]], <4 x i32> poison, <3 x i32> <i32 0, i32 1, i32 2>
+; CHECK-NEXT:    ret <3 x i32> [[SHUFFLE]]
+;
+  %vec = call <4 x i32> @llvm.amdgcn.readfirstlane.v4i32(<4 x i32> %src)
+  %shuffle = shufflevector <4 x i32> %vec, <4 x i32> poison, <3 x i32> <i32 0, i32 1, i32 2>
+  ret <3 x i32> %shuffle
+}
+
+define <3 x i32> @extract_elt123_v4i32_readfirstlane(<4 x i32> %src) {
+; CHECK-LABEL: define <3 x i32> @extract_elt123_v4i32_readfirstlane(
+; CHECK-SAME: <4 x i32> [[SRC:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[VEC:%.*]] = call <4 x i32> @llvm.amdgcn.readfirstlane.v4i32(<4 x i32> [[SRC]])
+; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[VEC]], <4 x i32> poison, <3 x i32> <i32 1, i32 2, i32 3>
+; CHECK-NEXT:    ret <3 x i32> [[SHUFFLE]]
+;
+  %vec = call <4 x i32> @llvm.amdgcn.readfirstlane.v4i32(<4 x i32> %src)
+  %shuffle = shufflevector <4 x i32> %vec, <4 x i32> poison, <3 x i32> <i32 1, i32 2, i32 3>
+  ret <3 x i32> %shuffle
+}
+
+define <2 x i32> @extract_elt13_v4i32_readfirstlane(<4 x i32> %src) {
+; CHECK-LABEL: define <2 x i32> @extract_elt13_v4i32_readfirstlane(
+; CHECK-SAME: <4 x i32> [[SRC:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[VEC:%.*]] = call <4 x i32> @llvm.amdgcn.readfirstlane.v4i32(<4 x i32> [[SRC]])
+; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[VEC]], <4 x i32> poison, <2 x i32> <i32 1, i32 3>
+; CHECK-NEXT:    ret <2 x i32> [[SHUFFLE]]
+;
+  %vec = call <4 x i32> @llvm.amdgcn.readfirstlane.v4i32(<4 x i32> %src)
+  %shuffle = shufflevector <4 x i32> %vec, <4 x i32> poison, <2 x i32> <i32 1, i32 3>
+  ret <2 x i32> %shuffle
+}
+
+define <2 x i16> @extract_elt13_v4i16readfirstlane(<4 x i16> %src) {
+; CHECK-LABEL: define <2 x i16> @extract_elt13_v4i16readfirstlane(
+; CHECK-SAME: <4 x i16> [[SRC:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[VEC:%.*]] = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> [[SRC]])
+; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[VEC]], <4 x i16> poison, <2 x i32> <i32 1, i32 3>
+; CHECK-NEXT:    ret <2 x i16> [[SHUFFLE]]
+;
+  %vec = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> %src)
+  %shuffle = shufflevector <4 x i16> %vec, <4 x i16> poison, <2 x i32> <i32 1, i32 3>
+  ret <2 x i16> %shuffle
+}
+
+define <2 x i32> @extract_elt13_v4i32_readfirstlane_source_simplify0(i32 %src0, i32 %src2) {
+; CHECK-LABEL: define <2 x i32> @extract_elt13_v4i32_readfirstlane_source_simplify0(
+; CHECK-SAME: i32 [[SRC0:%.*]], i32 [[SRC2:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[INS_0:%.*]] = insertelement <4 x i32> poison, i32 [[SRC0]], i64 0
+; CHECK-NEXT:    [[INS_1:%.*]] = shufflevector <4 x i32> [[INS_0]], <4 x i32> poison, <4 x i32> <i32 0, i32 0, i32 poison, i32 poison>
+; CHECK-NEXT:    [[VEC:%.*]] = call <4 x i32> @llvm.amdgcn.readfirstlane.v4i32(<4 x i32> [[INS_1]])
+; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[VEC]], <4 x i32> poison, <2 x i32> <i32 1, i32 3>
+; CHECK-NEXT:    ret <2 x i32> [[SHUFFLE]]
+;
+  %ins.0 = insertelement <4 x i32> poison, i32 %src0, i32 0
+  %ins.1 = insertelement <4 x i32> %ins.0, i32 %src0, i32 1
+  %vec = call <4 x i32> @llvm.amdgcn.readfirstlane.v4i32(<4 x i32> %ins.1)
+  %shuffle = shufflevector <4 x i32> %vec, <4 x i32> poison, <2 x i32> <i32 1, i32 3>
+  ret <2 x i32> %shuffle
+}
+
+define < 2 x i32> @extract_elt13_v4i32_readfirstlane_source_simplify1(i32 %src0, i32 %src2) {
+; CHECK-LABEL: define <2 x i32> @extract_elt13_v4i32_readfirstlane_source_simplify1(
+; CHECK-SAME: i32 [[SRC0:%.*]], i32 [[SRC2:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 [[SRC0]], i64 0
+; CHECK-NEXT:    [[INS_1:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> <i32 poison, i32 0, i32 poison, i32 0>
+; CHECK-NEXT:    [[VEC:%.*]] = call <4 x i32> @llvm.amdgcn.readfirstlane.v4i32(<4 x i32> [[INS_1]])
+; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[VEC]], <4 x i32> poison, <2 x i32> <i32 1, i32 3>
+; CHECK-NEXT:    ret <2 x i32> [[SHUFFLE]]
+;
+  %ins.0 = insertelement <4 x i32> poison, i32 %src0, i32 1
+  %ins.1 = insertelement <4 x i32> %ins.0, i32 %src0, i32 3
+  %vec = call <4 x i32> @llvm.amdgcn.readfirstlane.v4i32(<4 x i32> %ins.1)
+  %shuffle = shufflevector <4 x i32> %vec, <4 x i32> poison, <2 x i32> <i32 1, i32 3>
+  ret <2 x i32> %shuffle
+}
+
+define < 2 x i32> @extract_elt13_v4i32_readfirstlane_source_simplify2(i32 %src0, i32 %src2) {
+; CHECK-LABEL: define <2 x i32> @extract_elt13_v4i32_readfirstlane_source_simplify2(
+; CHECK-SAME: i32 [[SRC0:%.*]], i32 [[SRC2:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[INS_0:%.*]] = insertelement <4 x i32> poison, i32 [[SRC0]], i64 0
+; CHECK-NEXT:    [[INS_1:%.*]] = shufflevector <4 x i32> [[INS_0]], <4 x i32> poison, <4 x i32> <i32 0, i32 poison, i32 0, i32 poison>
+; CHECK-NEXT:    [[VEC:%.*]] = call <4 x i32> @llvm.amdgcn.readfirstlane.v4i32(<4 x i32> [[INS_1]])
+; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[VEC]], <4 x i32> poison, <2 x i32> <i32 1, i32 3>
+; CHECK-NEXT:    ret <2 x i32> [[SHUFFLE]]
+;
+  %ins.0 = insertelement <4 x i32> poison, i32 %src0, i32 0
+  %ins.1 = insertelement <4 x i32> %ins.0, i32 %src0, i32 2
+  %vec = call <4 x i32> @llvm.amdgcn.readfirstlane.v4i32(<4 x i32> %ins.1)
+  %shuffle = shufflevector <4 x i32> %vec, <4 x i32> poison, <2 x i32> <i32 1, i32 3>
+  ret <2 x i32> %shuffle
+}
+
+define i32 @extract_elt0_v2i32_readfirstlane_convergencetoken(<2 x i32> %src) convergent {
+; CHECK-LABEL: define i32 @extract_elt0_v2i32_readfirstlane_convergencetoken(
+; CHECK-SAME: <2 x i32> [[SRC:%.*]]) #[[ATTR1:[0-9]+]] {
+; CHECK-NEXT:    [[T:%.*]] = call token @llvm.experimental.convergence.entry()
+; CHECK-NEXT:    [[VEC:%.*]] = call <2 x i32> @llvm.amdgcn.readfirstlane.v2i32(<2 x i32> [[SRC]]) [ "convergencectrl"(token [[T]]) ]
+; CHECK-NEXT:    [[ELT:%.*]] = extractelement <2 x i32> [[VEC]], i64 0
+; CHECK-NEXT:    ret i32 [[ELT]]
+;
+  %t = call token @llvm.experimental.convergence.entry()
+  %vec = call <2 x i32> @llvm.amdgcn.readfirstlane.v2i32(<2 x i32> %src) [ "convergencectrl"(token %t) ]
+  %elt = extractelement <2 x i32> %vec, i32 0
+  ret i32 %elt
+}
+
+define < 2 x i32> @extract_elt13_v4i32_readfirstlane_source_simplify1_convergencetoken(i32 %src0, i32 %src2) convergent {
+; CHECK-LABEL: define <2 x i32> @extract_elt13_v4i32_readfirstlane_source_simplify1_convergencetoken(
+; CHECK-SAME: i32 [[SRC0:%.*]], i32 [[SRC2:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    [[T:%.*]] = call token @llvm.experimental.convergence.entry()
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 [[SRC0]], i64 0
+; CHECK-NEXT:    [[INS_1:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> <i32 poison, i32 0, i32 poison, i32 0>
+; CHECK-NEXT:    [[VEC:%.*]] = call <4 x i32> @llvm.amdgcn.readfirstlane.v4i32(<4 x i32> [[INS_1]]) [ "convergencectrl"(token [[T]]) ]
+; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[VEC]], <4 x i32> poison, <2 x i32> <i32 1, i32 3>
+; CHECK-NEXT:    ret <2 x i32> [[SHUFFLE]]
+;
+  %t = call token @llvm.experimental.convergence.entry()
+  %ins.0 = insertelement <4 x i32> poison, i32 %src0, i32 1
+  %ins.1 = insertelement <4 x i32> %ins.0, i32 %src0, i32 3
+  %vec = call <4 x i32> @llvm.amdgcn.readfirstlane.v4i32(<4 x i32> %ins.1) [ "convergencectrl"(token %t) ]
+  %shuffle = shufflevector <4 x i32> %vec, <4 x i32> poison, <2 x i32> <i32 1, i32 3>
+  ret <2 x i32> %shuffle
+}
+
+define i1 @extract_elt0_v2i1_readfirstlane(<2 x i1> %src) {
+; CHECK-LABEL: define i1 @extract_elt0_v2i1_readfirstlane(
+; CHECK-SAME: <2 x i1> [[SRC:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[VEC:%.*]] = call <2 x i1> @llvm.amdgcn.readfirstlane.v2i1(<2 x i1> [[SRC]])
+; CHECK-NEXT:    [[ELT:%.*]] = extractelement <2 x i1> [[VEC]], i64 0
+; CHECK-NEXT:    ret i1 [[ELT]]
+;
+  %vec = call <2 x i1> @llvm.amdgcn.readfirstlane.v2i1(<2 x i1> %src)
+  %elt = extractelement <2 x i1> %vec, i32 0
+  ret i1 %elt
+}
+
+define <2 x i1> @extract_elt01_v4i1_readfirstlane(<4 x i1> %src) {
+; CHECK-LABEL: define <2 x i1> @extract_elt01_v4i1_readfirstlane(
+; CHECK-SAME: <4 x i1> [[SRC:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[VEC:%.*]] = call <4 x i1> @llvm.amdgcn.readfirstlane.v4i1(<4 x i1> [[SRC]])
+; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x i1> [[VEC]], <4 x i1> poison, <2 x i32> <i32 0, i32 1>
+; CHECK-NEXT:    ret <2 x i1> [[SHUFFLE]]
+;
+  %vec = call <4 x i1> @llvm.amdgcn.readfirstlane.v4i1(<4 x i1> %src)
+  %shuffle = shufflevector <4 x i1> %vec, <4 x i1> poison, <2 x i32> <i32 0, i32 1>
+  ret <2 x i1> %shuffle
+}
+
+define <2 x i32> @extract_elt13_v8i32_readfirstlane(<8 x i32> %src) {
+; CHECK-LABEL: define <2 x i32> @extract_elt13_v8i32_readfirstlane(
+; CHECK-SAME: <8 x i32> [[SRC:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[VEC:%.*]] = call <8 x i32> @llvm.amdgcn.readfirstlane.v8i32(<8 x i32> [[SRC]])
+; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <8 x i32> [[VEC]], <8 x i32> poison, <2 x i32> <i32 1, i32 3>
+; CHECK-NEXT:    ret <2 x i32> [[SHUFFLE]]
+;
+  %vec = call <8 x i32> @llvm.amdgcn.readfirstlane.v8i32(<8 x i32> %src)
+  %shuffle = shufflevector <8 x i32> %vec, <8 x i32> poison, <2 x i32> <i32 1, i32 3>
+  ret <2 x i32> %shuffle
+}
+
+define <2 x i32> @extract_elt03_v4i32_readfirstlane(<4 x i32> %src) {
+; CHECK-LABEL: define <2 x i32> @extract_elt03_v4i32_readfirstlane(
+; CHECK-SAME: <4 x i32> [[SRC:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[VEC:%.*]] = call <4 x i32> @llvm.amdgcn.readfirstlane.v4i32(<4 x i32> [[SRC]])
+; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[VEC]], <4 x i32> poison, <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT:    ret <2 x i32> [[SHUFFLE]]
+;
+  %vec = call <4 x i32> @llvm.amdgcn.readfirstlane.v4i32(<4 x i32> %src)
+  %shuffle = shufflevector <4 x i32> %vec, <4 x i32> poison, <2 x i32> <i32 0, i32 3>
+  ret <2 x i32> %shuffle
+}
+
+define <3 x i32> @extract_elt124_v8i32_readfirstlane(<8 x i32> %src) {
+; CHECK-LABEL: define <3 x i32> @extract_elt124_v8i32_readfirstlane(
+; CHECK-SAME: <8 x i32> [[SRC:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[VEC:%.*]] = call <8 x i32> @llvm.amdgcn.readfirstlane.v8i32(<8 x i32> [[SRC]])
+; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <8 x i32> [[VEC]], <8 x i32> poison, <3 x i32> <i32 1, i32 2, i32 4>
+; CHECK-NEXT:    ret <3 x i32> [[SHUFFLE]]
+;
+  %vec = call <8 x i32> @llvm.amdgcn.readfirstlane.v8i32(<8 x i32> %src)
+  %shuffle = shufflevector <8 x i32> %vec, <8 x i32> poison, <3 x i32> <i32 1, i32 2, i32 4>
+  ret <3 x i32> %shuffle
+}


        


More information about the llvm-commits mailing list