[llvm] r363587 - AMDGPU: Fold readlane/readfirstlane calls

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Mon Jun 17 10:52:35 PDT 2019


Author: arsenm
Date: Mon Jun 17 10:52:35 2019
New Revision: 363587

URL: http://llvm.org/viewvc/llvm-project?rev=363587&view=rev
Log:
AMDGPU: Fold readlane/readfirstlane calls

Modified:
    llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp
    llvm/trunk/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll

Modified: llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp?rev=363587&r1=363586&r2=363587&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp (original)
+++ llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp Mon Jun 17 10:52:35 2019
@@ -3781,6 +3781,30 @@ Instruction *InstCombiner::visitCallInst
     // A constant value is trivially uniform.
     if (Constant *C = dyn_cast<Constant>(II->getArgOperand(0)))
       return replaceInstUsesWith(*II, C);
+
+    // The rest of these may not be safe if the exec may not be the same between
+    // the def and use.
+    Value *Src = II->getArgOperand(0);
+    Instruction *SrcInst = dyn_cast<Instruction>(Src);
+    if (SrcInst && SrcInst->getParent() != II->getParent())
+      break;
+
+    // readfirstlane (readfirstlane x) -> readfirstlane x
+    // readlane (readfirstlane x), y -> readfirstlane x
+    if (match(Src, m_Intrinsic<Intrinsic::amdgcn_readfirstlane>()))
+      return replaceInstUsesWith(*II, Src);
+
+    if (IID == Intrinsic::amdgcn_readfirstlane) {
+      // readfirstlane (readlane x, y) -> readlane x, y
+      if (match(Src, m_Intrinsic<Intrinsic::amdgcn_readlane>()))
+        return replaceInstUsesWith(*II, Src);
+    } else {
+      // readlane (readlane x, y), y -> readlane x, y
+      if (match(Src, m_Intrinsic<Intrinsic::amdgcn_readlane>(
+                  m_Value(), m_Specific(II->getArgOperand(1)))))
+        return replaceInstUsesWith(*II, Src);
+    }
+
     break;
   }
   case Intrinsic::stackrestore: {

Modified: llvm/trunk/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll?rev=363587&r1=363586&r2=363587&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll (original)
+++ llvm/trunk/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll Mon Jun 17 10:52:35 2019
@@ -2462,6 +2462,63 @@ define amdgpu_kernel void @readfirstlane
   ret void
 }
 
+define i32 @readfirstlane_idempotent(i32 %arg) {
+; CHECK-LABEL: @readfirstlane_idempotent(
+; CHECK-NEXT:    [[READ0:%.*]] = call i32 @llvm.amdgcn.readfirstlane(i32 [[ARG:%.*]])
+; CHECK-NEXT:    ret i32 [[READ0]]
+;
+  %read0 = call i32 @llvm.amdgcn.readfirstlane(i32 %arg)
+  %read1 = call i32 @llvm.amdgcn.readfirstlane(i32 %read0)
+  %read2 = call i32 @llvm.amdgcn.readfirstlane(i32 %read1)
+  ret i32 %read2
+}
+
+define i32 @readfirstlane_readlane(i32 %arg) {
+; CHECK-LABEL: @readfirstlane_readlane(
+; CHECK-NEXT:    [[READ0:%.*]] = call i32 @llvm.amdgcn.readfirstlane(i32 [[ARG:%.*]])
+; CHECK-NEXT:    ret i32 [[READ0]]
+;
+  %read0 = call i32 @llvm.amdgcn.readfirstlane(i32 %arg)
+  %read1 = call i32 @llvm.amdgcn.readlane(i32 %read0, i32 0)
+  ret i32 %read1
+}
+
+define i32 @readfirstlane_readfirstlane_different_block(i32 %arg) {
+; CHECK-LABEL: @readfirstlane_readfirstlane_different_block(
+; CHECK-NEXT:  bb0:
+; CHECK-NEXT:    [[READ0:%.*]] = call i32 @llvm.amdgcn.readfirstlane(i32 [[ARG:%.*]])
+; CHECK-NEXT:    br label [[BB1:%.*]]
+; CHECK:       bb1:
+; CHECK-NEXT:    [[READ1:%.*]] = call i32 @llvm.amdgcn.readfirstlane(i32 [[READ0]])
+; CHECK-NEXT:    ret i32 [[READ1]]
+;
+bb0:
+  %read0 = call i32 @llvm.amdgcn.readfirstlane(i32 %arg)
+  br label %bb1
+
+bb1:
+  %read1 = call i32 @llvm.amdgcn.readfirstlane(i32 %read0)
+  ret i32 %read1
+}
+
+define i32 @readfirstlane_readlane_different_block(i32 %arg) {
+; CHECK-LABEL: @readfirstlane_readlane_different_block(
+; CHECK-NEXT:  bb0:
+; CHECK-NEXT:    [[READ0:%.*]] = call i32 @llvm.amdgcn.readlane(i32 [[ARG:%.*]], i32 0)
+; CHECK-NEXT:    br label [[BB1:%.*]]
+; CHECK:       bb1:
+; CHECK-NEXT:    [[READ1:%.*]] = call i32 @llvm.amdgcn.readfirstlane(i32 [[READ0]])
+; CHECK-NEXT:    ret i32 [[READ1]]
+;
+bb0:
+  %read0 = call i32 @llvm.amdgcn.readlane(i32 %arg, i32 0)
+  br label %bb1
+
+bb1:
+  %read1 = call i32 @llvm.amdgcn.readfirstlane(i32 %read0)
+  ret i32 %read1
+}
+
 ; --------------------------------------------------------------------
 ; llvm.amdgcn.readlane
 ; --------------------------------------------------------------------
@@ -2491,6 +2548,74 @@ define amdgpu_kernel void @readlane_cons
   ret void
 }
 
+define i32 @readlane_idempotent(i32 %arg, i32 %lane) {
+; CHECK-LABEL: @readlane_idempotent(
+; CHECK-NEXT:    [[READ0:%.*]] = call i32 @llvm.amdgcn.readlane(i32 [[ARG:%.*]], i32 [[LANE:%.*]])
+; CHECK-NEXT:    ret i32 [[READ0]]
+;
+  %read0 = call i32 @llvm.amdgcn.readlane(i32 %arg, i32 %lane)
+  %read1 = call i32 @llvm.amdgcn.readlane(i32 %read0, i32 %lane)
+  ret i32 %read1
+}
+
+define i32 @readlane_idempotent_different_lanes(i32 %arg, i32 %lane0, i32 %lane1) {
+; CHECK-LABEL: @readlane_idempotent_different_lanes(
+; CHECK-NEXT:    [[READ0:%.*]] = call i32 @llvm.amdgcn.readlane(i32 [[ARG:%.*]], i32 [[LANE0:%.*]])
+; CHECK-NEXT:    [[READ1:%.*]] = call i32 @llvm.amdgcn.readlane(i32 [[READ0]], i32 [[LANE1:%.*]])
+; CHECK-NEXT:    ret i32 [[READ1]]
+;
+  %read0 = call i32 @llvm.amdgcn.readlane(i32 %arg, i32 %lane0)
+  %read1 = call i32 @llvm.amdgcn.readlane(i32 %read0, i32 %lane1)
+  ret i32 %read1
+}
+
+define i32 @readlane_readfirstlane(i32 %arg) {
+; CHECK-LABEL: @readlane_readfirstlane(
+; CHECK-NEXT:    [[READ0:%.*]] = call i32 @llvm.amdgcn.readfirstlane(i32 [[ARG:%.*]])
+; CHECK-NEXT:    ret i32 [[READ0]]
+;
+  %read0 = call i32 @llvm.amdgcn.readfirstlane(i32 %arg)
+  %read1 = call i32 @llvm.amdgcn.readlane(i32 %read0, i32 0)
+  ret i32 %read1
+}
+
+define i32 @readlane_idempotent_different_block(i32 %arg, i32 %lane) {
+; CHECK-LABEL: @readlane_idempotent_different_block(
+; CHECK-NEXT:  bb0:
+; CHECK-NEXT:    [[READ0:%.*]] = call i32 @llvm.amdgcn.readlane(i32 [[ARG:%.*]], i32 [[LANE:%.*]])
+; CHECK-NEXT:    br label [[BB1:%.*]]
+; CHECK:       bb1:
+; CHECK-NEXT:    [[READ1:%.*]] = call i32 @llvm.amdgcn.readlane(i32 [[READ0]], i32 [[LANE]])
+; CHECK-NEXT:    ret i32 [[READ1]]
+;
+bb0:
+  %read0 = call i32 @llvm.amdgcn.readlane(i32 %arg, i32 %lane)
+  br label %bb1
+
+bb1:
+  %read1 = call i32 @llvm.amdgcn.readlane(i32 %read0, i32 %lane)
+  ret i32 %read1
+}
+
+
+define i32 @readlane_readfirstlane_different_block(i32 %arg) {
+; CHECK-LABEL: @readlane_readfirstlane_different_block(
+; CHECK-NEXT:  bb0:
+; CHECK-NEXT:    [[READ0:%.*]] = call i32 @llvm.amdgcn.readfirstlane(i32 [[ARG:%.*]])
+; CHECK-NEXT:    br label [[BB1:%.*]]
+; CHECK:       bb1:
+; CHECK-NEXT:    [[READ1:%.*]] = call i32 @llvm.amdgcn.readlane(i32 [[READ0]], i32 0)
+; CHECK-NEXT:    ret i32 [[READ1]]
+;
+bb0:
+  %read0 = call i32 @llvm.amdgcn.readfirstlane(i32 %arg)
+  br label %bb1
+
+bb1:
+  %read1 = call i32 @llvm.amdgcn.readlane(i32 %read0, i32 0)
+  ret i32 %read1
+}
+
 ; --------------------------------------------------------------------
 ; llvm.amdgcn.update.dpp.i32
 ; --------------------------------------------------------------------




More information about the llvm-commits mailing list