[llvm] [AMDGPU] Hoist readlane/readfirst through unary/binary operands (PR #129037)

Shilei Tian via llvm-commits llvm-commits at lists.llvm.org
Tue Mar 11 10:08:00 PDT 2025


================
@@ -0,0 +1,231 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -mtriple=amdgcn-- -mcpu=gfx1030 -passes=instcombine -S < %s | FileCheck %s
+
+; The readfirstlane version of this test covers all the interesting cases of the
+; shared logic. This testcase focuses on readlane specific pitfalls.
+
+; test unary
+
+define float @hoist_fneg_f32(float %arg, i32 %lane) {
+; CHECK-LABEL: define float @hoist_fneg_f32(
+; CHECK-SAME: float [[ARG:%.*]], i32 [[LANE:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:  [[BB:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = call float @llvm.amdgcn.readlane.f32(float [[ARG]], i32 [[LANE]])
+; CHECK-NEXT:    [[RFL:%.*]] = fneg float [[TMP0]]
+; CHECK-NEXT:    ret float [[RFL]]
+;
+bb:
+  %val = fneg float %arg
+  %rl = call float @llvm.amdgcn.readlane.f32(float %val, i32 %lane)
+  ret float %rl
+}
+
+define double @hoist_fneg_f64(double %arg, i32 %lane) {
+; CHECK-LABEL: define double @hoist_fneg_f64(
+; CHECK-SAME: double [[ARG:%.*]], i32 [[LANE:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  [[BB:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = call double @llvm.amdgcn.readlane.f64(double [[ARG]], i32 [[LANE]])
+; CHECK-NEXT:    [[RFL:%.*]] = fneg double [[TMP0]]
+; CHECK-NEXT:    ret double [[RFL]]
+;
+bb:
+  %val = fneg double %arg
+  %rl = call double @llvm.amdgcn.readlane.f64(double %val, i32 %lane)
+  ret double %rl
+}
+
+; test casts
+
+define i32 @hoist_trunc(i64 %arg, i32 %lane) {
+; CHECK-LABEL: define i32 @hoist_trunc(
+; CHECK-SAME: i64 [[ARG:%.*]], i32 [[LANE:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  [[BB:.*:]]
+; CHECK-NEXT:    [[RFL:%.*]] = call i64 @llvm.amdgcn.readlane.i64(i64 [[ARG]], i32 [[LANE]])
+; CHECK-NEXT:    [[TMP0:%.*]] = trunc i64 [[RFL]] to i32
+; CHECK-NEXT:    ret i32 [[TMP0]]
+;
+bb:
+  %val = trunc i64 %arg to i32
+  %rl = call i32 @llvm.amdgcn.readlane.i32(i32 %val, i32 %lane)
+  ret i32 %rl
+}
+
+define i64 @hoist_zext(i32 %arg, i32 %lane) {
+; CHECK-LABEL: define i64 @hoist_zext(
+; CHECK-SAME: i32 [[ARG:%.*]], i32 [[LANE:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  [[BB:.*:]]
+; CHECK-NEXT:    [[RFL:%.*]] = call i32 @llvm.amdgcn.readlane.i32(i32 [[ARG]], i32 [[LANE]])
+; CHECK-NEXT:    [[TMP0:%.*]] = zext i32 [[RFL]] to i64
+; CHECK-NEXT:    ret i64 [[TMP0]]
+;
+bb:
+  %val = zext i32 %arg to i64
----------------
shiltian wrote:

I wonder what is the benefit of doing so?

https://github.com/llvm/llvm-project/pull/129037


More information about the llvm-commits mailing list