[llvm] r339025 - AMDGPU: Fold v_lshl_or_b32 with 0 src0

Mon Aug 6 08:40:20 PDT 2018

Author: arsenm
Date: Mon Aug  6 08:40:20 2018
New Revision: 339025

URL: http://llvm.org/viewvc/llvm-project?rev=339025&view=rev
Log:
AMDGPU: Fold v_lshl_or_b32 with 0 src0

Appears from expansion of some packed cases.

Modified:
    llvm/trunk/lib/Target/AMDGPU/SIFoldOperands.cpp
    llvm/trunk/test/CodeGen/AMDGPU/constant-fold-imm-immreg.mir

Modified: llvm/trunk/lib/Target/AMDGPU/SIFoldOperands.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIFoldOperands.cpp?rev=339025&r1=339024&r2=339025&view=diff
==============================================================================

--- llvm/trunk/lib/Target/AMDGPU/SIFoldOperands.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIFoldOperands.cpp Mon Aug  6 08:40:20 2018
@@ -550,6 +550,19 @@ static bool tryConstantFoldOp(MachineReg
   if (!Src0->isImm() && !Src1->isImm())
     return false;
 
+  if (MI->getOpcode() == AMDGPU::V_LSHL_OR_B32) {
+    if (Src0->isImm() && Src0->getImm() == 0) {
+      // v_lshl_or_b32 0, X, Y -> copy Y
+      // v_lshl_or_b32 0, X, K -> v_mov_b32 K
+      bool UseCopy = TII->getNamedOperand(*MI, AMDGPU::OpName::src2)->isReg();
+      MI->RemoveOperand(Src1Idx);
+      MI->RemoveOperand(Src0Idx);
+
+      MI->setDesc(TII->get(UseCopy ? AMDGPU::COPY : AMDGPU::V_MOV_B32_e32));
+      return true;
+    }
+  }
+
   // and k0, k1 -> v_mov_b32 (k0 & k1)
   // or k0, k1 -> v_mov_b32 (k0 | k1)
   // xor k0, k1 -> v_mov_b32 (k0 ^ k1)

Modified: llvm/trunk/test/CodeGen/AMDGPU/constant-fold-imm-immreg.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/constant-fold-imm-immreg.mir?rev=339025&r1=339024&r2=339025&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/constant-fold-imm-immreg.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/constant-fold-imm-immreg.mir Mon Aug  6 08:40:20 2018
@@ -1,4 +1,4 @@
-# RUN: llc -mtriple=amdgcn--amdhsa -mcpu=hawaii -verify-machineinstrs -run-pass si-fold-operands,dead-mi-elimination -o - %s | FileCheck -check-prefix=GCN %s
+# RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx900 -verify-machineinstrs -run-pass si-fold-operands,dead-mi-elimination -o - %s | FileCheck -check-prefix=GCN %s
 ...
 
 # GCN-LABEL: name: s_fold_and_imm_regimm_32{{$}}
@@ -831,3 +831,75 @@ body:             |
     S_ENDPGM implicit $vcc
 
 ...
+---
+# GCN-LABEL: name: constant_fold_lshl_or_reg0_immreg_reg{{$}}
+# GCN: %2:vgpr_32 = COPY $vgpr0, implicit $exec
+# GCN-NEXT: S_ENDPGM implicit %2
+
+name: constant_fold_lshl_or_reg0_immreg_reg
+alignment:       0
+exposesReturnsTwice: false
+legalized:       false
+regBankSelected: false
+selected:        false
+tracksRegLiveness: true
+
+body:             |
+  bb.0:
+    liveins: $vgpr0
+
+  %0:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+  %1:vgpr_32 = V_MOV_B32_e32 16, implicit $exec
+  %2:vgpr_32 = V_LSHL_OR_B32 %0,%1, $vgpr0, implicit $exec
+  S_ENDPGM implicit %2
+
+...
+
+---
+
+# GCN-LABEL: name: constant_fold_lshl_or_reg0_immreg_imm{{$}}
+# GCN: %2:vgpr_32 = V_MOV_B32_e32 10, implicit $exec
+# GCN-NEXT: S_ENDPGM implicit %2
+
+name: constant_fold_lshl_or_reg0_immreg_imm
+alignment:       0
+exposesReturnsTwice: false
+legalized:       false
+regBankSelected: false
+selected:        false
+tracksRegLiveness: true
+
+body:             |
+  bb.0:
+
+  %0:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+  %1:vgpr_32 = V_MOV_B32_e32 16, implicit $exec
+  %2:vgpr_32 = V_LSHL_OR_B32 %0, %1, 10, implicit $exec
+  S_ENDPGM implicit %2
+
+...
+
+---
+
+# GCN-LABEL: name: constant_fold_lshl_or_reg0_immreg_immreg{{$}}
+# GCN: %3:vgpr_32 = V_MOV_B32_e32 12345, implicit $exec, implicit $exec
+# GCN-NEXT: S_ENDPGM implicit %3
+
+name: constant_fold_lshl_or_reg0_immreg_immreg
+alignment:       0
+exposesReturnsTwice: false
+legalized:       false
+regBankSelected: false
+selected:        false
+tracksRegLiveness: true
+
+body:             |
+  bb.0:
+
+  %0:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+  %1:vgpr_32 = V_MOV_B32_e32 16, implicit $exec
+  %2:vgpr_32 = V_MOV_B32_e32 12345, implicit $exec
+  %3:vgpr_32 = V_LSHL_OR_B32 %0, %1, %2, implicit $exec
+  S_ENDPGM implicit %3
+
+...