[llvm] r339025 - AMDGPU: Fold v_lshl_or_b32 with 0 src0
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Mon Aug 6 08:40:20 PDT 2018
Author: arsenm
Date: Mon Aug 6 08:40:20 2018
New Revision: 339025
URL: http://llvm.org/viewvc/llvm-project?rev=339025&view=rev
Log:
AMDGPU: Fold v_lshl_or_b32 with 0 src0
Appears from expansion of some packed cases.
Modified:
llvm/trunk/lib/Target/AMDGPU/SIFoldOperands.cpp
llvm/trunk/test/CodeGen/AMDGPU/constant-fold-imm-immreg.mir
Modified: llvm/trunk/lib/Target/AMDGPU/SIFoldOperands.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIFoldOperands.cpp?rev=339025&r1=339024&r2=339025&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIFoldOperands.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIFoldOperands.cpp Mon Aug 6 08:40:20 2018
@@ -550,6 +550,19 @@ static bool tryConstantFoldOp(MachineReg
if (!Src0->isImm() && !Src1->isImm())
return false;
+ if (MI->getOpcode() == AMDGPU::V_LSHL_OR_B32) {
+ if (Src0->isImm() && Src0->getImm() == 0) {
+ // v_lshl_or_b32 0, X, Y -> copy Y
+ // v_lshl_or_b32 0, X, K -> v_mov_b32 K
+ bool UseCopy = TII->getNamedOperand(*MI, AMDGPU::OpName::src2)->isReg();
+ MI->RemoveOperand(Src1Idx);
+ MI->RemoveOperand(Src0Idx);
+
+ MI->setDesc(TII->get(UseCopy ? AMDGPU::COPY : AMDGPU::V_MOV_B32_e32));
+ return true;
+ }
+ }
+
// and k0, k1 -> v_mov_b32 (k0 & k1)
// or k0, k1 -> v_mov_b32 (k0 | k1)
// xor k0, k1 -> v_mov_b32 (k0 ^ k1)
Modified: llvm/trunk/test/CodeGen/AMDGPU/constant-fold-imm-immreg.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/constant-fold-imm-immreg.mir?rev=339025&r1=339024&r2=339025&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/constant-fold-imm-immreg.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/constant-fold-imm-immreg.mir Mon Aug 6 08:40:20 2018
@@ -1,4 +1,4 @@
-# RUN: llc -mtriple=amdgcn--amdhsa -mcpu=hawaii -verify-machineinstrs -run-pass si-fold-operands,dead-mi-elimination -o - %s | FileCheck -check-prefix=GCN %s
+# RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx900 -verify-machineinstrs -run-pass si-fold-operands,dead-mi-elimination -o - %s | FileCheck -check-prefix=GCN %s
...
# GCN-LABEL: name: s_fold_and_imm_regimm_32{{$}}
@@ -831,3 +831,75 @@ body: |
S_ENDPGM implicit $vcc
...
+---
+# GCN-LABEL: name: constant_fold_lshl_or_reg0_immreg_reg{{$}}
+# GCN: %2:vgpr_32 = COPY $vgpr0, implicit $exec
+# GCN-NEXT: S_ENDPGM implicit %2
+
+name: constant_fold_lshl_or_reg0_immreg_reg
+alignment: 0
+exposesReturnsTwice: false
+legalized: false
+regBankSelected: false
+selected: false
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ %0:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ %1:vgpr_32 = V_MOV_B32_e32 16, implicit $exec
+ %2:vgpr_32 = V_LSHL_OR_B32 %0,%1, $vgpr0, implicit $exec
+ S_ENDPGM implicit %2
+
+...
+
+---
+
+# GCN-LABEL: name: constant_fold_lshl_or_reg0_immreg_imm{{$}}
+# GCN: %2:vgpr_32 = V_MOV_B32_e32 10, implicit $exec
+# GCN-NEXT: S_ENDPGM implicit %2
+
+name: constant_fold_lshl_or_reg0_immreg_imm
+alignment: 0
+exposesReturnsTwice: false
+legalized: false
+regBankSelected: false
+selected: false
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+
+ %0:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ %1:vgpr_32 = V_MOV_B32_e32 16, implicit $exec
+ %2:vgpr_32 = V_LSHL_OR_B32 %0, %1, 10, implicit $exec
+ S_ENDPGM implicit %2
+
+...
+
+---
+
+# GCN-LABEL: name: constant_fold_lshl_or_reg0_immreg_immreg{{$}}
+# GCN: %3:vgpr_32 = V_MOV_B32_e32 12345, implicit $exec, implicit $exec
+# GCN-NEXT: S_ENDPGM implicit %3
+
+name: constant_fold_lshl_or_reg0_immreg_immreg
+alignment: 0
+exposesReturnsTwice: false
+legalized: false
+regBankSelected: false
+selected: false
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+
+ %0:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ %1:vgpr_32 = V_MOV_B32_e32 16, implicit $exec
+ %2:vgpr_32 = V_MOV_B32_e32 12345, implicit $exec
+ %3:vgpr_32 = V_LSHL_OR_B32 %0, %1, %2, implicit $exec
+ S_ENDPGM implicit %3
+
+...
More information about the llvm-commits
mailing list