[llvm] r266506 - AMDGPU: Use s_addk_i32 / s_mulk_i32
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Fri Apr 15 18:46:50 PDT 2016
Author: arsenm
Date: Fri Apr 15 20:46:49 2016
New Revision: 266506
URL: http://llvm.org/viewvc/llvm-project?rev=266506&view=rev
Log:
AMDGPU: Use s_addk_i32 / s_mulk_i32
Added:
llvm/trunk/test/CodeGen/AMDGPU/s_addk_i32.ll
llvm/trunk/test/CodeGen/AMDGPU/s_mulk_i32.ll
Modified:
llvm/trunk/lib/Target/AMDGPU/SIShrinkInstructions.cpp
llvm/trunk/test/CodeGen/AMDGPU/fceil64.ll
llvm/trunk/test/CodeGen/AMDGPU/ftrunc.f64.ll
llvm/trunk/test/CodeGen/AMDGPU/shl_add_constant.ll
Modified: llvm/trunk/lib/Target/AMDGPU/SIShrinkInstructions.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIShrinkInstructions.cpp?rev=266506&r1=266505&r2=266506&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIShrinkInstructions.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIShrinkInstructions.cpp Fri Apr 15 20:46:49 2016
@@ -198,6 +198,10 @@ static MachineOperand copyRegOperandAsIm
Orig.isInternalRead());
}
+static bool isKImmOperand(const SIInstrInfo *TII, const MachineOperand &Src) {
+ return isInt<16>(Src.getImm()) && !TII->isInlineConstant(Src, 4);
+}
+
bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
MachineRegisterInfo &MRI = MF.getRegInfo();
const SIInstrInfo *TII =
@@ -214,18 +218,6 @@ bool SIShrinkInstructions::runOnMachineF
Next = std::next(I);
MachineInstr &MI = *I;
- // Try to use S_MOVK_I32, which will save 4 bytes for small immediates.
- if (MI.getOpcode() == AMDGPU::S_MOV_B32) {
- const MachineOperand &Src = MI.getOperand(1);
-
- if (Src.isImm()) {
- if (isInt<16>(Src.getImm()) && !TII->isInlineConstant(Src, 4))
- MI.setDesc(TII->get(AMDGPU::S_MOVK_I32));
- }
-
- continue;
- }
-
if (MI.getOpcode() == AMDGPU::V_MOV_B32_e32) {
// If this has a literal constant source that is the same as the
// reversed bits of an inline immediate, replace with a bitreverse of
@@ -250,6 +242,47 @@ bool SIShrinkInstructions::runOnMachineF
}
}
+ // FIXME: We also need to consider movs of constant operands since
+ // immediate operands are not folded if they have more than one use, and
+ // the operand folding pass is unaware if the immediate will be free since
+ // it won't know if the src == dest constraint will end up being
+ // satisfied.
+ if (MI.getOpcode() == AMDGPU::S_ADD_I32 ||
+ MI.getOpcode() == AMDGPU::S_MUL_I32) {
+ const MachineOperand &Dest = MI.getOperand(0);
+ const MachineOperand &Src0 = MI.getOperand(1);
+ const MachineOperand &Src1 = MI.getOperand(2);
+
+ // FIXME: This could work better if hints worked with subregisters. If
+ // we have a vector add of a constant, we usually don't get the correct
+ // allocation due to the subregister usage.
+ if (TargetRegisterInfo::isVirtualRegister(Dest.getReg()) &&
+ Src0.isReg()) {
+ MRI.setRegAllocationHint(Dest.getReg(), 0, Src0.getReg());
+ continue;
+ }
+
+ if (Src0.isReg() && Src0.getReg() == Dest.getReg()) {
+ if (Src1.isImm() && isKImmOperand(TII, Src1)) {
+ unsigned Opc = (MI.getOpcode() == AMDGPU::S_ADD_I32) ?
+ AMDGPU::S_ADDK_I32 : AMDGPU::S_MULK_I32;
+
+ MI.setDesc(TII->get(Opc));
+ MI.tieOperands(0, 1);
+ }
+ }
+ }
+
+ // Try to use S_MOVK_I32, which will save 4 bytes for small immediates.
+ if (MI.getOpcode() == AMDGPU::S_MOV_B32) {
+ const MachineOperand &Src = MI.getOperand(1);
+
+ if (Src.isImm() && isKImmOperand(TII, Src))
+ MI.setDesc(TII->get(AMDGPU::S_MOVK_I32));
+
+ continue;
+ }
+
if (!TII->hasVALU32BitEncoding(MI.getOpcode()))
continue;
Modified: llvm/trunk/test/CodeGen/AMDGPU/fceil64.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/fceil64.ll?rev=266506&r1=266505&r2=266506&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/fceil64.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/fceil64.ll Fri Apr 15 20:46:49 2016
@@ -13,8 +13,8 @@ declare <16 x double> @llvm.ceil.v16f64(
; CI: v_ceil_f64_e32
; SI: s_bfe_u32 [[SEXP:s[0-9]+]], {{s[0-9]+}}, 0xb0014
; SI-DAG: s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80000000
-; SI-DAG: s_add_i32 [[A:s[0-9]+]], [[SEXP]], 0xfffffc01
-; SI-DAG: s_lshr_b64 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], [[A]]
+; SI-DAG: s_addk_i32 [[SEXP]], 0xfc01
+; SI-DAG: s_lshr_b64 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], [[SEXP]]
; SI-DAG: s_not_b64
; SI-DAG: s_and_b64
; SI-DAG: cmp_gt_i32
Modified: llvm/trunk/test/CodeGen/AMDGPU/ftrunc.f64.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/ftrunc.f64.ll?rev=266506&r1=266505&r2=266506&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/ftrunc.f64.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/ftrunc.f64.ll Fri Apr 15 20:46:49 2016
@@ -25,8 +25,8 @@ define void @v_ftrunc_f64(double addrspa
; SI: s_bfe_u32 [[SEXP:s[0-9]+]], {{s[0-9]+}}, 0xb0014
; SI-DAG: s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80000000
-; SI-DAG: s_add_i32 [[A:s[0-9]+]], [[SEXP]], 0xfffffc01
-; SI-DAG: s_lshr_b64 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], [[A]]
+; SI-DAG: s_addk_i32 [[SEXP]], 0xfc01
+; SI-DAG: s_lshr_b64 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], [[SEXP]]
; SI-DAG: s_not_b64
; SI-DAG: s_and_b64
; SI-DAG: cmp_gt_i32
Added: llvm/trunk/test/CodeGen/AMDGPU/s_addk_i32.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/s_addk_i32.ll?rev=266506&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/s_addk_i32.ll (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/s_addk_i32.ll Fri Apr 15 20:46:49 2016
@@ -0,0 +1,93 @@
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+
+; SI-LABEL: {{^}}s_addk_i32_k0:
+; SI: s_load_dword [[VAL:s[0-9]+]]
+; SI: s_addk_i32 [[VAL]], 0x41
+; SI: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[VAL]]
+; SI: buffer_store_dword [[VRESULT]]
+; SI: s_endpgm
+define void @s_addk_i32_k0(i32 addrspace(1)* %out, i32 %b) {
+ %add = add i32 %b, 65
+ store i32 %add, i32 addrspace(1)* %out
+ ret void
+}
+
+; FIXME: This should be folded with any number of uses.
+; SI-LABEL: {{^}}s_addk_i32_k0_x2:
+; SI: s_movk_i32 [[K:s[0-9]+]], 0x41
+; SI-DAG: s_add_i32 {{s[0-9]+}}, {{s[0-9]+}}, [[K]]
+; SI-DAG: s_add_i32 {{s[0-9]+}}, {{s[0-9]+}}, [[K]]
+; SI: s_endpgm
+define void @s_addk_i32_k0_x2(i32 addrspace(1)* %out0, i32 addrspace(1)* %out1, i32 %a, i32 %b) {
+ %add0 = add i32 %a, 65
+ %add1 = add i32 %b, 65
+ store i32 %add0, i32 addrspace(1)* %out0
+ store i32 %add1, i32 addrspace(1)* %out1
+ ret void
+}
+
+; SI-LABEL: {{^}}s_addk_i32_k1:
+; SI: s_addk_i32 {{s[0-9]+}}, 0x7fff{{$}}
+; SI: s_endpgm
+define void @s_addk_i32_k1(i32 addrspace(1)* %out, i32 %b) {
+ %add = add i32 %b, 32767 ; (1 << 15) - 1
+ store i32 %add, i32 addrspace(1)* %out
+ ret void
+}
+
+; SI-LABEL: {{^}}s_addk_i32_k2:
+; SI: s_addk_i32 {{s[0-9]+}}, 0xffef{{$}}
+; SI: s_endpgm
+define void @s_addk_i32_k2(i32 addrspace(1)* %out, i32 %b) {
+ %add = add i32 %b, -17
+ store i32 %add, i32 addrspace(1)* %out
+ ret void
+}
+
+; SI-LABEL: {{^}}s_addk_v2i32_k0:
+; SI-DAG: s_addk_i32 {{s[0-9]+}}, 0x41
+; SI-DAG: s_addk_i32 {{s[0-9]+}}, 0x42
+; SI: s_endpgm
+define void @s_addk_v2i32_k0(<2 x i32> addrspace(1)* %out, <2 x i32> %b) {
+ %add = add <2 x i32> %b, <i32 65, i32 66>
+ store <2 x i32> %add, <2 x i32> addrspace(1)* %out
+ ret void
+}
+
+; SI-LABEL: {{^}}s_addk_v4i32_k0:
+; SI-DAG: s_addk_i32 {{s[0-9]+}}, 0x41
+; SI-DAG: s_addk_i32 {{s[0-9]+}}, 0x42
+; SI-DAG: s_addk_i32 {{s[0-9]+}}, 0x43
+; SI-DAG: s_addk_i32 {{s[0-9]+}}, 0x44
+; SI: s_endpgm
+define void @s_addk_v4i32_k0(<4 x i32> addrspace(1)* %out, <4 x i32> %b) {
+ %add = add <4 x i32> %b, <i32 65, i32 66, i32 67, i32 68>
+ store <4 x i32> %add, <4 x i32> addrspace(1)* %out
+ ret void
+}
+
+; SI-LABEL: {{^}}s_addk_v8i32_k0:
+; SI-DAG: s_addk_i32 {{s[0-9]+}}, 0x41
+; SI-DAG: s_addk_i32 {{s[0-9]+}}, 0x42
+; SI-DAG: s_addk_i32 {{s[0-9]+}}, 0x43
+; SI-DAG: s_addk_i32 {{s[0-9]+}}, 0x44
+; SI-DAG: s_addk_i32 {{s[0-9]+}}, 0x45
+; SI-DAG: s_addk_i32 {{s[0-9]+}}, 0x46
+; SI-DAG: s_addk_i32 {{s[0-9]+}}, 0x47
+; SI-DAG: s_addk_i32 {{s[0-9]+}}, 0x48
+; SI: s_endpgm
+define void @s_addk_v8i32_k0(<8 x i32> addrspace(1)* %out, <8 x i32> %b) {
+ %add = add <8 x i32> %b, <i32 65, i32 66, i32 67, i32 68, i32 69, i32 70, i32 71, i32 72>
+ store <8 x i32> %add, <8 x i32> addrspace(1)* %out
+ ret void
+}
+
+; SI-LABEL: {{^}}no_s_addk_i32_k0:
+; SI: s_add_i32 {{s[0-9]+}}, {{s[0-9]+}}, 0x8000{{$}}
+; SI: s_endpgm
+define void @no_s_addk_i32_k0(i32 addrspace(1)* %out, i32 %b) {
+ %add = add i32 %b, 32768 ; 1 << 15
+ store i32 %add, i32 addrspace(1)* %out
+ ret void
+}
Added: llvm/trunk/test/CodeGen/AMDGPU/s_mulk_i32.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/s_mulk_i32.ll?rev=266506&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/s_mulk_i32.ll (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/s_mulk_i32.ll Fri Apr 15 20:46:49 2016
@@ -0,0 +1,41 @@
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+
+; SI-LABEL: {{^}}s_mulk_i32_k0:
+; SI: s_load_dword [[VAL:s[0-9]+]]
+; SI: s_mulk_i32 [[VAL]], 0x41
+; SI: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[VAL]]
+; SI: buffer_store_dword [[VRESULT]]
+; SI: s_endpgm
+define void @s_mulk_i32_k0(i32 addrspace(1)* %out, i32 %b) {
+ %mul = mul i32 %b, 65
+ store i32 %mul, i32 addrspace(1)* %out
+ ret void
+}
+
+; SI-LABEL: {{^}}s_mulk_i32_k1:
+; SI: s_mulk_i32 {{s[0-9]+}}, 0x7fff{{$}}
+; SI: s_endpgm
+define void @s_mulk_i32_k1(i32 addrspace(1)* %out, i32 %b) {
+ %mul = mul i32 %b, 32767 ; (1 << 15) - 1
+ store i32 %mul, i32 addrspace(1)* %out
+ ret void
+}
+
+; SI-LABEL: {{^}}s_mulk_i32_k2:
+; SI: s_mulk_i32 {{s[0-9]+}}, 0xffef{{$}}
+; SI: s_endpgm
+define void @s_mulk_i32_k2(i32 addrspace(1)* %out, i32 %b) {
+ %mul = mul i32 %b, -17
+ store i32 %mul, i32 addrspace(1)* %out
+ ret void
+}
+
+; SI-LABEL: {{^}}no_s_mulk_i32_k0:
+; SI: s_mul_i32 {{s[0-9]+}}, {{s[0-9]+}}, 0x8001{{$}}
+; SI: s_endpgm
+define void @no_s_mulk_i32_k0(i32 addrspace(1)* %out, i32 %b) {
+ %mul = mul i32 %b, 32769 ; 1 << 15 + 1
+ store i32 %mul, i32 addrspace(1)* %out
+ ret void
+}
Modified: llvm/trunk/test/CodeGen/AMDGPU/shl_add_constant.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/shl_add_constant.ll?rev=266506&r1=266505&r2=266506&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/shl_add_constant.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/shl_add_constant.ll Fri Apr 15 20:46:49 2016
@@ -74,8 +74,8 @@ define void @test_add_shl_add_constant(i
; SI-DAG: s_load_dword [[Y:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
; SI: s_lshl_b32 [[SHL3:s[0-9]+]], [[X]], 3
; SI: s_add_i32 [[TMP:s[0-9]+]], [[Y]], [[SHL3]]
-; SI: s_add_i32 [[RESULT:s[0-9]+]], [[TMP]], 0x3d8
-; SI: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[RESULT]]
+; SI: s_addk_i32 [[TMP]], 0x3d8
+; SI: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[TMP]]
; SI: buffer_store_dword [[VRESULT]]
define void @test_add_shl_add_constant_inv(i32 addrspace(1)* %out, i32 %x, i32 %y) #0 {
More information about the llvm-commits
mailing list