[llvm] AMDGPU: Add baseline tests for simplify elts of readfirstlane (PR #128645)
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Mon Feb 24 23:42:57 PST 2025
https://github.com/arsenm created https://github.com/llvm/llvm-project/pull/128645
None
>From fe3305757dced527296621a39a679dd2594b9045 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Tue, 25 Feb 2025 12:08:44 +0700
Subject: [PATCH] AMDGPU: Add baseline tests for simplify elts of readfirstlane
---
...fy-demanded-vector-elts-lane-intrinsics.ll | 444 ++++++++++++++++++
1 file changed, 444 insertions(+)
create mode 100644 llvm/test/Transforms/InstCombine/AMDGPU/simplify-demanded-vector-elts-lane-intrinsics.ll
diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/simplify-demanded-vector-elts-lane-intrinsics.ll b/llvm/test/Transforms/InstCombine/AMDGPU/simplify-demanded-vector-elts-lane-intrinsics.ll
new file mode 100644
index 0000000000000..83d9d0d032ed1
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/AMDGPU/simplify-demanded-vector-elts-lane-intrinsics.ll
@@ -0,0 +1,444 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -passes=instcombine < %s | FileCheck %s
+
+define i16 @extract_elt0_v2i16_readfirstlane(<2 x i16> %src) {
+; CHECK-LABEL: define i16 @extract_elt0_v2i16_readfirstlane(
+; CHECK-SAME: <2 x i16> [[SRC:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: [[VEC:%.*]] = call <2 x i16> @llvm.amdgcn.readfirstlane.v2i16(<2 x i16> [[SRC]])
+; CHECK-NEXT: [[ELT:%.*]] = extractelement <2 x i16> [[VEC]], i64 0
+; CHECK-NEXT: ret i16 [[ELT]]
+;
+ %vec = call <2 x i16> @llvm.amdgcn.readfirstlane.v2i16(<2 x i16> %src)
+ %elt = extractelement <2 x i16> %vec, i32 0
+ ret i16 %elt
+}
+
+define i16 @extract_elt0_v1i16_readfirstlane(<1 x i16> %src) {
+; CHECK-LABEL: define i16 @extract_elt0_v1i16_readfirstlane(
+; CHECK-SAME: <1 x i16> [[SRC:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[VEC:%.*]] = call <1 x i16> @llvm.amdgcn.readfirstlane.v1i16(<1 x i16> [[SRC]])
+; CHECK-NEXT: [[ELT:%.*]] = extractelement <1 x i16> [[VEC]], i64 0
+; CHECK-NEXT: ret i16 [[ELT]]
+;
+ %vec = call <1 x i16> @llvm.amdgcn.readfirstlane.v1i16(<1 x i16> %src)
+ %elt = extractelement <1 x i16> %vec, i32 0
+ ret i16 %elt
+}
+
+define i16 @extract_elt1_v2i16_readfirstlane(<2 x i16> %src) {
+; CHECK-LABEL: define i16 @extract_elt1_v2i16_readfirstlane(
+; CHECK-SAME: <2 x i16> [[SRC:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[VEC:%.*]] = call <2 x i16> @llvm.amdgcn.readfirstlane.v2i16(<2 x i16> [[SRC]])
+; CHECK-NEXT: [[ELT:%.*]] = extractelement <2 x i16> [[VEC]], i64 1
+; CHECK-NEXT: ret i16 [[ELT]]
+;
+ %vec = call <2 x i16> @llvm.amdgcn.readfirstlane.v2i16(<2 x i16> %src)
+ %elt = extractelement <2 x i16> %vec, i32 1
+ ret i16 %elt
+}
+
+define i16 @extract_elt0_v4i16_readfirstlane(<4 x i16> %src) {
+; CHECK-LABEL: define i16 @extract_elt0_v4i16_readfirstlane(
+; CHECK-SAME: <4 x i16> [[SRC:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[VEC:%.*]] = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> [[SRC]])
+; CHECK-NEXT: [[ELT:%.*]] = extractelement <4 x i16> [[VEC]], i64 0
+; CHECK-NEXT: ret i16 [[ELT]]
+;
+ %vec = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> %src)
+ %elt = extractelement <4 x i16> %vec, i32 0
+ ret i16 %elt
+}
+
+define i16 @extract_elt2_v4i16_readfirstlane(<4 x i16> %src) {
+; CHECK-LABEL: define i16 @extract_elt2_v4i16_readfirstlane(
+; CHECK-SAME: <4 x i16> [[SRC:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[VEC:%.*]] = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> [[SRC]])
+; CHECK-NEXT: [[ELT:%.*]] = extractelement <4 x i16> [[VEC]], i64 2
+; CHECK-NEXT: ret i16 [[ELT]]
+;
+ %vec = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> %src)
+ %elt = extractelement <4 x i16> %vec, i32 2
+ ret i16 %elt
+}
+
+define <2 x i16> @extract_elt01_v4i16_readfirstlane(<4 x i16> %src) {
+; CHECK-LABEL: define <2 x i16> @extract_elt01_v4i16_readfirstlane(
+; CHECK-SAME: <4 x i16> [[SRC:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[VEC:%.*]] = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> [[SRC]])
+; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[VEC]], <4 x i16> poison, <2 x i32> <i32 0, i32 1>
+; CHECK-NEXT: ret <2 x i16> [[SHUFFLE]]
+;
+ %vec = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> %src)
+ %shuffle = shufflevector <4 x i16> %vec, <4 x i16> poison, <2 x i32> <i32 0, i32 1>
+ ret <2 x i16> %shuffle
+}
+
+define <2 x i16> @extract_elt12_v4i16_readfirstlane(<4 x i16> %src) {
+; CHECK-LABEL: define <2 x i16> @extract_elt12_v4i16_readfirstlane(
+; CHECK-SAME: <4 x i16> [[SRC:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[VEC:%.*]] = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> [[SRC]])
+; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[VEC]], <4 x i16> poison, <2 x i32> <i32 1, i32 2>
+; CHECK-NEXT: ret <2 x i16> [[SHUFFLE]]
+;
+ %vec = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> %src)
+ %shuffle = shufflevector <4 x i16> %vec, <4 x i16> poison, <2 x i32> <i32 1, i32 2>
+ ret <2 x i16> %shuffle
+}
+
+define <2 x i16> @extract_elt23_v4i16_readfirstlane(<4 x i16> %src) {
+; CHECK-LABEL: define <2 x i16> @extract_elt23_v4i16_readfirstlane(
+; CHECK-SAME: <4 x i16> [[SRC:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[VEC:%.*]] = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> [[SRC]])
+; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[VEC]], <4 x i16> poison, <2 x i32> <i32 2, i32 3>
+; CHECK-NEXT: ret <2 x i16> [[SHUFFLE]]
+;
+ %vec = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> %src)
+ %shuffle = shufflevector <4 x i16> %vec, <4 x i16> poison, <2 x i32> <i32 2, i32 3>
+ ret <2 x i16> %shuffle
+}
+
+define <2 x i16> @extract_elt10_v4i16_readfirstlane(<4 x i16> %src) {
+; CHECK-LABEL: define <2 x i16> @extract_elt10_v4i16_readfirstlane(
+; CHECK-SAME: <4 x i16> [[SRC:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[VEC:%.*]] = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> [[SRC]])
+; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[VEC]], <4 x i16> poison, <2 x i32> <i32 1, i32 0>
+; CHECK-NEXT: ret <2 x i16> [[SHUFFLE]]
+;
+ %vec = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> %src)
+ %shuffle = shufflevector <4 x i16> %vec, <4 x i16> poison, <2 x i32> <i32 1, i32 0>
+ ret <2 x i16> %shuffle
+}
+
+define <2 x i16> @extract_elt32_v4i16_readfirstlane(<4 x i16> %src) {
+; CHECK-LABEL: define <2 x i16> @extract_elt32_v4i16_readfirstlane(
+; CHECK-SAME: <4 x i16> [[SRC:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[VEC:%.*]] = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> [[SRC]])
+; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[VEC]], <4 x i16> poison, <2 x i32> <i32 3, i32 2>
+; CHECK-NEXT: ret <2 x i16> [[SHUFFLE]]
+;
+ %vec = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> %src)
+ %shuffle = shufflevector <4 x i16> %vec, <4 x i16> poison, <2 x i32> <i32 3, i32 2>
+ ret <2 x i16> %shuffle
+}
+
+define <2 x i16> @extract_elt30_v4i16_readfirstlane(<4 x i16> %src) {
+; CHECK-LABEL: define <2 x i16> @extract_elt30_v4i16_readfirstlane(
+; CHECK-SAME: <4 x i16> [[SRC:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[VEC:%.*]] = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> [[SRC]])
+; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[VEC]], <4 x i16> poison, <2 x i32> <i32 3, i32 0>
+; CHECK-NEXT: ret <2 x i16> [[SHUFFLE]]
+;
+ %vec = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> %src)
+ %shuffle = shufflevector <4 x i16> %vec, <4 x i16> poison, <2 x i32> <i32 3, i32 0>
+ ret <2 x i16> %shuffle
+}
+
+define half @extract_elt0_v2f16_readfirstlane(<2 x half> %src) {
+; CHECK-LABEL: define half @extract_elt0_v2f16_readfirstlane(
+; CHECK-SAME: <2 x half> [[SRC:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[VEC:%.*]] = call <2 x half> @llvm.amdgcn.readfirstlane.v2f16(<2 x half> [[SRC]])
+; CHECK-NEXT: [[ELT:%.*]] = extractelement <2 x half> [[VEC]], i64 0
+; CHECK-NEXT: ret half [[ELT]]
+;
+ %vec = call <2 x half> @llvm.amdgcn.readfirstlane.v2i16(<2 x half> %src)
+ %elt = extractelement <2 x half> %vec, i32 0
+ ret half %elt
+}
+
+define half @extract_elt1_v2f16_readfirstlane(<2 x half> %src) {
+; CHECK-LABEL: define half @extract_elt1_v2f16_readfirstlane(
+; CHECK-SAME: <2 x half> [[SRC:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[VEC:%.*]] = call <2 x half> @llvm.amdgcn.readfirstlane.v2f16(<2 x half> [[SRC]])
+; CHECK-NEXT: [[ELT:%.*]] = extractelement <2 x half> [[VEC]], i64 1
+; CHECK-NEXT: ret half [[ELT]]
+;
+ %vec = call <2 x half> @llvm.amdgcn.readfirstlane.v2i16(<2 x half> %src)
+ %elt = extractelement <2 x half> %vec, i32 1
+ ret half %elt
+}
+
+; Don't break on illegal types
+define i8 @extract_elt0_v4i8_readfirstlane(<4 x i8> %src) {
+; CHECK-LABEL: define i8 @extract_elt0_v4i8_readfirstlane(
+; CHECK-SAME: <4 x i8> [[SRC:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[VEC:%.*]] = call <4 x i8> @llvm.amdgcn.readfirstlane.v4i8(<4 x i8> [[SRC]])
+; CHECK-NEXT: [[ELT:%.*]] = extractelement <4 x i8> [[VEC]], i64 0
+; CHECK-NEXT: ret i8 [[ELT]]
+;
+ %vec = call <4 x i8> @llvm.amdgcn.readfirstlane.v4ii8(<4 x i8> %src)
+ %elt = extractelement <4 x i8> %vec, i32 0
+ ret i8 %elt
+}
+
+; Don't break on illegal types
+define i32 @extract_elt0_nxv4i32_readfirstlane(<vscale x 2 x i32> %src) {
+; CHECK-LABEL: define i32 @extract_elt0_nxv4i32_readfirstlane(
+; CHECK-SAME: <vscale x 2 x i32> [[SRC:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[VEC:%.*]] = call <vscale x 2 x i32> @llvm.amdgcn.readfirstlane.nxv2i32(<vscale x 2 x i32> [[SRC]])
+; CHECK-NEXT: [[ELT:%.*]] = extractelement <vscale x 2 x i32> [[VEC]], i64 0
+; CHECK-NEXT: ret i32 [[ELT]]
+;
+ %vec = call <vscale x 2 x i32> @llvm.amdgcn.readfirstlane.nxv2i32(<vscale x 2 x i32> %src)
+ %elt = extractelement <vscale x 2 x i32> %vec, i32 0
+ ret i32 %elt
+}
+
+define i32 @extract_elt0_v2i32_readfirstlane(<2 x i32> %src) {
+; CHECK-LABEL: define i32 @extract_elt0_v2i32_readfirstlane(
+; CHECK-SAME: <2 x i32> [[SRC:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[VEC:%.*]] = call <2 x i32> @llvm.amdgcn.readfirstlane.v2i32(<2 x i32> [[SRC]])
+; CHECK-NEXT: [[ELT:%.*]] = extractelement <2 x i32> [[VEC]], i64 0
+; CHECK-NEXT: ret i32 [[ELT]]
+;
+ %vec = call <2 x i32> @llvm.amdgcn.readfirstlane.v2i32(<2 x i32> %src)
+ %elt = extractelement <2 x i32> %vec, i32 0
+ ret i32 %elt
+}
+
+define ptr addrspace(3) @extract_elt0_v2p3_readfirstlane(<2 x ptr addrspace(3)> %src) {
+; CHECK-LABEL: define ptr addrspace(3) @extract_elt0_v2p3_readfirstlane(
+; CHECK-SAME: <2 x ptr addrspace(3)> [[SRC:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[VEC:%.*]] = call <2 x ptr addrspace(3)> @llvm.amdgcn.readfirstlane.v2p3(<2 x ptr addrspace(3)> [[SRC]])
+; CHECK-NEXT: [[ELT:%.*]] = extractelement <2 x ptr addrspace(3)> [[VEC]], i64 0
+; CHECK-NEXT: ret ptr addrspace(3) [[ELT]]
+;
+ %vec = call <2 x ptr addrspace(3)> @llvm.amdgcn.readfirstlane.v2p3(<2 x ptr addrspace(3)> %src)
+ %elt = extractelement <2 x ptr addrspace(3)> %vec, i32 0
+ ret ptr addrspace(3) %elt
+}
+
+define i64 @extract_elt0_v2i64_readfirstlane(<2 x i64> %src) {
+; CHECK-LABEL: define i64 @extract_elt0_v2i64_readfirstlane(
+; CHECK-SAME: <2 x i64> [[SRC:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[VEC:%.*]] = call <2 x i64> @llvm.amdgcn.readfirstlane.v2i64(<2 x i64> [[SRC]])
+; CHECK-NEXT: [[ELT:%.*]] = extractelement <2 x i64> [[VEC]], i64 0
+; CHECK-NEXT: ret i64 [[ELT]]
+;
+ %vec = call <2 x i64> @llvm.amdgcn.readfirstlane.v2i64(<2 x i64> %src)
+ %elt = extractelement <2 x i64> %vec, i32 0
+ ret i64 %elt
+}
+
+define i64 @extract_elt1_v2i64_readfirstlane(<2 x i64> %src) {
+; CHECK-LABEL: define i64 @extract_elt1_v2i64_readfirstlane(
+; CHECK-SAME: <2 x i64> [[SRC:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[VEC:%.*]] = call <2 x i64> @llvm.amdgcn.readfirstlane.v2i64(<2 x i64> [[SRC]])
+; CHECK-NEXT: [[ELT:%.*]] = extractelement <2 x i64> [[VEC]], i64 1
+; CHECK-NEXT: ret i64 [[ELT]]
+;
+ %vec = call <2 x i64> @llvm.amdgcn.readfirstlane.v2i64(<2 x i64> %src)
+ %elt = extractelement <2 x i64> %vec, i32 1
+ ret i64 %elt
+}
+
+define <3 x i16> @extract_elt012_v4i16_readfirstlane(<4 x i16> %src) {
+; CHECK-LABEL: define <3 x i16> @extract_elt012_v4i16_readfirstlane(
+; CHECK-SAME: <4 x i16> [[SRC:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[VEC:%.*]] = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> [[SRC]])
+; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[VEC]], <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
+; CHECK-NEXT: ret <3 x i16> [[SHUFFLE]]
+;
+ %vec = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> %src)
+ %shuffle = shufflevector <4 x i16> %vec, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
+ ret <3 x i16> %shuffle
+}
+
+define <3 x i16> @extract_elt123_v4i16_readfirstlane(<4 x i16> %src) {
+; CHECK-LABEL: define <3 x i16> @extract_elt123_v4i16_readfirstlane(
+; CHECK-SAME: <4 x i16> [[SRC:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[VEC:%.*]] = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> [[SRC]])
+; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[VEC]], <4 x i16> poison, <3 x i32> <i32 1, i32 2, i32 3>
+; CHECK-NEXT: ret <3 x i16> [[SHUFFLE]]
+;
+ %vec = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> %src)
+ %shuffle = shufflevector <4 x i16> %vec, <4 x i16> poison, <3 x i32> <i32 1, i32 2, i32 3>
+ ret <3 x i16> %shuffle
+}
+
+define <3 x i32> @extract_elt012_v4i32_readfirstlane(<4 x i32> %src) {
+; CHECK-LABEL: define <3 x i32> @extract_elt012_v4i32_readfirstlane(
+; CHECK-SAME: <4 x i32> [[SRC:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[VEC:%.*]] = call <4 x i32> @llvm.amdgcn.readfirstlane.v4i32(<4 x i32> [[SRC]])
+; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[VEC]], <4 x i32> poison, <3 x i32> <i32 0, i32 1, i32 2>
+; CHECK-NEXT: ret <3 x i32> [[SHUFFLE]]
+;
+ %vec = call <4 x i32> @llvm.amdgcn.readfirstlane.v4i32(<4 x i32> %src)
+ %shuffle = shufflevector <4 x i32> %vec, <4 x i32> poison, <3 x i32> <i32 0, i32 1, i32 2>
+ ret <3 x i32> %shuffle
+}
+
+define <3 x i32> @extract_elt123_v4i32_readfirstlane(<4 x i32> %src) {
+; CHECK-LABEL: define <3 x i32> @extract_elt123_v4i32_readfirstlane(
+; CHECK-SAME: <4 x i32> [[SRC:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[VEC:%.*]] = call <4 x i32> @llvm.amdgcn.readfirstlane.v4i32(<4 x i32> [[SRC]])
+; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[VEC]], <4 x i32> poison, <3 x i32> <i32 1, i32 2, i32 3>
+; CHECK-NEXT: ret <3 x i32> [[SHUFFLE]]
+;
+ %vec = call <4 x i32> @llvm.amdgcn.readfirstlane.v4i32(<4 x i32> %src)
+ %shuffle = shufflevector <4 x i32> %vec, <4 x i32> poison, <3 x i32> <i32 1, i32 2, i32 3>
+ ret <3 x i32> %shuffle
+}
+
+define <2 x i32> @extract_elt13_v4i32_readfirstlane(<4 x i32> %src) {
+; CHECK-LABEL: define <2 x i32> @extract_elt13_v4i32_readfirstlane(
+; CHECK-SAME: <4 x i32> [[SRC:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[VEC:%.*]] = call <4 x i32> @llvm.amdgcn.readfirstlane.v4i32(<4 x i32> [[SRC]])
+; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[VEC]], <4 x i32> poison, <2 x i32> <i32 1, i32 3>
+; CHECK-NEXT: ret <2 x i32> [[SHUFFLE]]
+;
+ %vec = call <4 x i32> @llvm.amdgcn.readfirstlane.v4i32(<4 x i32> %src)
+ %shuffle = shufflevector <4 x i32> %vec, <4 x i32> poison, <2 x i32> <i32 1, i32 3>
+ ret <2 x i32> %shuffle
+}
+
+define <2 x i16> @extract_elt13_v4i16readfirstlane(<4 x i16> %src) {
+; CHECK-LABEL: define <2 x i16> @extract_elt13_v4i16readfirstlane(
+; CHECK-SAME: <4 x i16> [[SRC:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[VEC:%.*]] = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> [[SRC]])
+; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[VEC]], <4 x i16> poison, <2 x i32> <i32 1, i32 3>
+; CHECK-NEXT: ret <2 x i16> [[SHUFFLE]]
+;
+ %vec = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> %src)
+ %shuffle = shufflevector <4 x i16> %vec, <4 x i16> poison, <2 x i32> <i32 1, i32 3>
+ ret <2 x i16> %shuffle
+}
+
+define <2 x i32> @extract_elt13_v4i32_readfirstlane_source_simplify0(i32 %src0, i32 %src2) {
+; CHECK-LABEL: define <2 x i32> @extract_elt13_v4i32_readfirstlane_source_simplify0(
+; CHECK-SAME: i32 [[SRC0:%.*]], i32 [[SRC2:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[INS_0:%.*]] = insertelement <4 x i32> poison, i32 [[SRC0]], i64 0
+; CHECK-NEXT: [[INS_1:%.*]] = shufflevector <4 x i32> [[INS_0]], <4 x i32> poison, <4 x i32> <i32 0, i32 0, i32 poison, i32 poison>
+; CHECK-NEXT: [[VEC:%.*]] = call <4 x i32> @llvm.amdgcn.readfirstlane.v4i32(<4 x i32> [[INS_1]])
+; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[VEC]], <4 x i32> poison, <2 x i32> <i32 1, i32 3>
+; CHECK-NEXT: ret <2 x i32> [[SHUFFLE]]
+;
+ %ins.0 = insertelement <4 x i32> poison, i32 %src0, i32 0
+ %ins.1 = insertelement <4 x i32> %ins.0, i32 %src0, i32 1
+ %vec = call <4 x i32> @llvm.amdgcn.readfirstlane.v4i32(<4 x i32> %ins.1)
+ %shuffle = shufflevector <4 x i32> %vec, <4 x i32> poison, <2 x i32> <i32 1, i32 3>
+ ret <2 x i32> %shuffle
+}
+
+define < 2 x i32> @extract_elt13_v4i32_readfirstlane_source_simplify1(i32 %src0, i32 %src2) {
+; CHECK-LABEL: define <2 x i32> @extract_elt13_v4i32_readfirstlane_source_simplify1(
+; CHECK-SAME: i32 [[SRC0:%.*]], i32 [[SRC2:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 [[SRC0]], i64 0
+; CHECK-NEXT: [[INS_1:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> <i32 poison, i32 0, i32 poison, i32 0>
+; CHECK-NEXT: [[VEC:%.*]] = call <4 x i32> @llvm.amdgcn.readfirstlane.v4i32(<4 x i32> [[INS_1]])
+; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[VEC]], <4 x i32> poison, <2 x i32> <i32 1, i32 3>
+; CHECK-NEXT: ret <2 x i32> [[SHUFFLE]]
+;
+ %ins.0 = insertelement <4 x i32> poison, i32 %src0, i32 1
+ %ins.1 = insertelement <4 x i32> %ins.0, i32 %src0, i32 3
+ %vec = call <4 x i32> @llvm.amdgcn.readfirstlane.v4i32(<4 x i32> %ins.1)
+ %shuffle = shufflevector <4 x i32> %vec, <4 x i32> poison, <2 x i32> <i32 1, i32 3>
+ ret <2 x i32> %shuffle
+}
+
+define < 2 x i32> @extract_elt13_v4i32_readfirstlane_source_simplify2(i32 %src0, i32 %src2) {
+; CHECK-LABEL: define <2 x i32> @extract_elt13_v4i32_readfirstlane_source_simplify2(
+; CHECK-SAME: i32 [[SRC0:%.*]], i32 [[SRC2:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[INS_0:%.*]] = insertelement <4 x i32> poison, i32 [[SRC0]], i64 0
+; CHECK-NEXT: [[INS_1:%.*]] = shufflevector <4 x i32> [[INS_0]], <4 x i32> poison, <4 x i32> <i32 0, i32 poison, i32 0, i32 poison>
+; CHECK-NEXT: [[VEC:%.*]] = call <4 x i32> @llvm.amdgcn.readfirstlane.v4i32(<4 x i32> [[INS_1]])
+; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[VEC]], <4 x i32> poison, <2 x i32> <i32 1, i32 3>
+; CHECK-NEXT: ret <2 x i32> [[SHUFFLE]]
+;
+ %ins.0 = insertelement <4 x i32> poison, i32 %src0, i32 0
+ %ins.1 = insertelement <4 x i32> %ins.0, i32 %src0, i32 2
+ %vec = call <4 x i32> @llvm.amdgcn.readfirstlane.v4i32(<4 x i32> %ins.1)
+ %shuffle = shufflevector <4 x i32> %vec, <4 x i32> poison, <2 x i32> <i32 1, i32 3>
+ ret <2 x i32> %shuffle
+}
+
+define i32 @extract_elt0_v2i32_readfirstlane_convergencetoken(<2 x i32> %src) convergent {
+; CHECK-LABEL: define i32 @extract_elt0_v2i32_readfirstlane_convergencetoken(
+; CHECK-SAME: <2 x i32> [[SRC:%.*]]) #[[ATTR1:[0-9]+]] {
+; CHECK-NEXT: [[T:%.*]] = call token @llvm.experimental.convergence.entry()
+; CHECK-NEXT: [[VEC:%.*]] = call <2 x i32> @llvm.amdgcn.readfirstlane.v2i32(<2 x i32> [[SRC]]) [ "convergencectrl"(token [[T]]) ]
+; CHECK-NEXT: [[ELT:%.*]] = extractelement <2 x i32> [[VEC]], i64 0
+; CHECK-NEXT: ret i32 [[ELT]]
+;
+ %t = call token @llvm.experimental.convergence.entry()
+ %vec = call <2 x i32> @llvm.amdgcn.readfirstlane.v2i32(<2 x i32> %src) [ "convergencectrl"(token %t) ]
+ %elt = extractelement <2 x i32> %vec, i32 0
+ ret i32 %elt
+}
+
+define < 2 x i32> @extract_elt13_v4i32_readfirstlane_source_simplify1_convergencetoken(i32 %src0, i32 %src2) convergent {
+; CHECK-LABEL: define <2 x i32> @extract_elt13_v4i32_readfirstlane_source_simplify1_convergencetoken(
+; CHECK-SAME: i32 [[SRC0:%.*]], i32 [[SRC2:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT: [[T:%.*]] = call token @llvm.experimental.convergence.entry()
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 [[SRC0]], i64 0
+; CHECK-NEXT: [[INS_1:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> <i32 poison, i32 0, i32 poison, i32 0>
+; CHECK-NEXT: [[VEC:%.*]] = call <4 x i32> @llvm.amdgcn.readfirstlane.v4i32(<4 x i32> [[INS_1]]) [ "convergencectrl"(token [[T]]) ]
+; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[VEC]], <4 x i32> poison, <2 x i32> <i32 1, i32 3>
+; CHECK-NEXT: ret <2 x i32> [[SHUFFLE]]
+;
+ %t = call token @llvm.experimental.convergence.entry()
+ %ins.0 = insertelement <4 x i32> poison, i32 %src0, i32 1
+ %ins.1 = insertelement <4 x i32> %ins.0, i32 %src0, i32 3
+ %vec = call <4 x i32> @llvm.amdgcn.readfirstlane.v4i32(<4 x i32> %ins.1) [ "convergencectrl"(token %t) ]
+ %shuffle = shufflevector <4 x i32> %vec, <4 x i32> poison, <2 x i32> <i32 1, i32 3>
+ ret <2 x i32> %shuffle
+}
+
+define i1 @extract_elt0_v2i1_readfirstlane(<2 x i1> %src) {
+; CHECK-LABEL: define i1 @extract_elt0_v2i1_readfirstlane(
+; CHECK-SAME: <2 x i1> [[SRC:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[VEC:%.*]] = call <2 x i1> @llvm.amdgcn.readfirstlane.v2i1(<2 x i1> [[SRC]])
+; CHECK-NEXT: [[ELT:%.*]] = extractelement <2 x i1> [[VEC]], i64 0
+; CHECK-NEXT: ret i1 [[ELT]]
+;
+ %vec = call <2 x i1> @llvm.amdgcn.readfirstlane.v2i1(<2 x i1> %src)
+ %elt = extractelement <2 x i1> %vec, i32 0
+ ret i1 %elt
+}
+
+define <2 x i1> @extract_elt01_v4i1_readfirstlane(<4 x i1> %src) {
+; CHECK-LABEL: define <2 x i1> @extract_elt01_v4i1_readfirstlane(
+; CHECK-SAME: <4 x i1> [[SRC:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[VEC:%.*]] = call <4 x i1> @llvm.amdgcn.readfirstlane.v4i1(<4 x i1> [[SRC]])
+; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i1> [[VEC]], <4 x i1> poison, <2 x i32> <i32 0, i32 1>
+; CHECK-NEXT: ret <2 x i1> [[SHUFFLE]]
+;
+ %vec = call <4 x i1> @llvm.amdgcn.readfirstlane.v4i1(<4 x i1> %src)
+ %shuffle = shufflevector <4 x i1> %vec, <4 x i1> poison, <2 x i32> <i32 0, i32 1>
+ ret <2 x i1> %shuffle
+}
+
+define <2 x i32> @extract_elt13_v8i32_readfirstlane(<8 x i32> %src) {
+; CHECK-LABEL: define <2 x i32> @extract_elt13_v8i32_readfirstlane(
+; CHECK-SAME: <8 x i32> [[SRC:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[VEC:%.*]] = call <8 x i32> @llvm.amdgcn.readfirstlane.v8i32(<8 x i32> [[SRC]])
+; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i32> [[VEC]], <8 x i32> poison, <2 x i32> <i32 1, i32 3>
+; CHECK-NEXT: ret <2 x i32> [[SHUFFLE]]
+;
+ %vec = call <8 x i32> @llvm.amdgcn.readfirstlane.v8i32(<8 x i32> %src)
+ %shuffle = shufflevector <8 x i32> %vec, <8 x i32> poison, <2 x i32> <i32 1, i32 3>
+ ret <2 x i32> %shuffle
+}
+
+define <2 x i32> @extract_elt03_v4i32_readfirstlane(<4 x i32> %src) {
+; CHECK-LABEL: define <2 x i32> @extract_elt03_v4i32_readfirstlane(
+; CHECK-SAME: <4 x i32> [[SRC:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[VEC:%.*]] = call <4 x i32> @llvm.amdgcn.readfirstlane.v4i32(<4 x i32> [[SRC]])
+; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[VEC]], <4 x i32> poison, <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT: ret <2 x i32> [[SHUFFLE]]
+;
+ %vec = call <4 x i32> @llvm.amdgcn.readfirstlane.v4i32(<4 x i32> %src)
+ %shuffle = shufflevector <4 x i32> %vec, <4 x i32> poison, <2 x i32> <i32 0, i32 3>
+ ret <2 x i32> %shuffle
+}
+
+define <3 x i32> @extract_elt124_v8i32_readfirstlane(<8 x i32> %src) {
+; CHECK-LABEL: define <3 x i32> @extract_elt124_v8i32_readfirstlane(
+; CHECK-SAME: <8 x i32> [[SRC:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[VEC:%.*]] = call <8 x i32> @llvm.amdgcn.readfirstlane.v8i32(<8 x i32> [[SRC]])
+; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i32> [[VEC]], <8 x i32> poison, <3 x i32> <i32 1, i32 2, i32 4>
+; CHECK-NEXT: ret <3 x i32> [[SHUFFLE]]
+;
+ %vec = call <8 x i32> @llvm.amdgcn.readfirstlane.v8i32(<8 x i32> %src)
+ %shuffle = shufflevector <8 x i32> %vec, <8 x i32> poison, <3 x i32> <i32 1, i32 2, i32 4>
+ ret <3 x i32> %shuffle
+}
More information about the llvm-commits
mailing list