[llvm] r259180 - AMDGPU: Remove 24-bit intrinsics
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Fri Jan 29 02:05:18 PST 2016
Author: arsenm
Date: Fri Jan 29 04:05:16 2016
New Revision: 259180
URL: http://llvm.org/viewvc/llvm-project?rev=259180&view=rev
Log:
AMDGPU: Remove 24-bit intrinsics
The known bit matching code seems to work reasonably well,
so these shouldn't really be needed.
Removed:
llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.imad24.ll
llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.imul24.ll
llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.umad24.ll
llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.umul24.ll
Modified:
llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructions.td
llvm/trunk/lib/Target/AMDGPU/AMDGPUIntrinsics.td
llvm/trunk/lib/Target/AMDGPU/CaymanInstructions.td
llvm/trunk/lib/Target/AMDGPU/EvergreenInstructions.td
llvm/trunk/lib/Target/AMDGPU/R600Instructions.td
llvm/trunk/test/CodeGen/AMDGPU/mad_int24.ll
llvm/trunk/test/CodeGen/AMDGPU/use-sgpr-multiple-times.ll
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp?rev=259180&r1=259179&r2=259180&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp Fri Jan 29 04:05:16 2016
@@ -926,22 +926,6 @@ SDValue AMDGPUTargetLowering::LowerINTRI
return DAG.getNode(AMDGPUISD::LDEXP, DL, VT, Op.getOperand(1),
Op.getOperand(2));
- case AMDGPUIntrinsic::AMDGPU_umul24:
- return DAG.getNode(AMDGPUISD::MUL_U24, DL, VT,
- Op.getOperand(1), Op.getOperand(2));
-
- case AMDGPUIntrinsic::AMDGPU_imul24:
- return DAG.getNode(AMDGPUISD::MUL_I24, DL, VT,
- Op.getOperand(1), Op.getOperand(2));
-
- case AMDGPUIntrinsic::AMDGPU_umad24:
- return DAG.getNode(AMDGPUISD::MAD_U24, DL, VT,
- Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
-
- case AMDGPUIntrinsic::AMDGPU_imad24:
- return DAG.getNode(AMDGPUISD::MAD_I24, DL, VT,
- Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
-
case AMDGPUIntrinsic::AMDGPU_bfe_i32:
return DAG.getNode(AMDGPUISD::BFE_I32, DL, VT,
Op.getOperand(1),
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructions.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructions.td?rev=259180&r1=259179&r2=259180&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructions.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructions.td Fri Jan 29 04:05:16 2016
@@ -597,30 +597,6 @@ class UMad24Pat<Instruction Inst> : Pat
(Inst $src0, $src1, $src2)
>;
-multiclass Expand24IBitOps<Instruction MulInst, Instruction AddInst> {
- def _expand_imad24 : Pat <
- (AMDGPUmad_i24 i32:$src0, i32:$src1, i32:$src2),
- (AddInst (MulInst $src0, $src1), $src2)
- >;
-
- def _expand_imul24 : Pat <
- (AMDGPUmul_i24 i32:$src0, i32:$src1),
- (MulInst $src0, $src1)
- >;
-}
-
-multiclass Expand24UBitOps<Instruction MulInst, Instruction AddInst> {
- def _expand_umad24 : Pat <
- (AMDGPUmad_u24 i32:$src0, i32:$src1, i32:$src2),
- (AddInst (MulInst $src0, $src1), $src2)
- >;
-
- def _expand_umul24 : Pat <
- (AMDGPUmul_u24 i32:$src0, i32:$src1),
- (MulInst $src0, $src1)
- >;
-}
-
class RcpPat<Instruction RcpInst, ValueType vt> : Pat <
(fdiv FP_ONE, vt:$src),
(RcpInst $src)
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUIntrinsics.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUIntrinsics.td?rev=259180&r1=259179&r2=259180&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUIntrinsics.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUIntrinsics.td Fri Jan 29 04:05:16 2016
@@ -16,10 +16,6 @@ let TargetPrefix = "AMDGPU", isTarget =
def int_AMDGPU_kill : Intrinsic<[], [llvm_float_ty], []>;
def int_AMDGPU_kilp : Intrinsic<[], [], []>;
- def int_AMDGPU_umul24 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
- def int_AMDGPU_imul24 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
- def int_AMDGPU_imad24 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
- def int_AMDGPU_umad24 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
def int_AMDGPU_cvt_f32_ubyte0 : Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
def int_AMDGPU_cvt_f32_ubyte1 : Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
def int_AMDGPU_cvt_f32_ubyte2 : Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
Modified: llvm/trunk/lib/Target/AMDGPU/CaymanInstructions.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/CaymanInstructions.td?rev=259180&r1=259179&r2=259180&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/CaymanInstructions.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/CaymanInstructions.td Fri Jan 29 04:05:16 2016
@@ -51,7 +51,6 @@ def : RsqPat<RECIPSQRT_IEEE_cm, f32>;
def : POW_Common <LOG_IEEE_cm, EXP_IEEE_cm, MUL>;
defm DIV_cm : DIV_Common<RECIP_IEEE_cm>;
-defm : Expand24UBitOps<MULLO_UINT_cm, ADD_INT>;
// RECIP_UINT emulation for Cayman
// The multiplication scales from [0,1] to the unsigned integer range
Modified: llvm/trunk/lib/Target/AMDGPU/EvergreenInstructions.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/EvergreenInstructions.td?rev=259180&r1=259179&r2=259180&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/EvergreenInstructions.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/EvergreenInstructions.td Fri Jan 29 04:05:16 2016
@@ -85,8 +85,6 @@ def COS_eg : COS_Common<0x8E>;
def : POW_Common <LOG_IEEE_eg, EXP_IEEE_eg, MUL>;
def : Pat<(fsqrt f32:$src), (MUL $src, (RECIPSQRT_CLAMPED_eg $src))>;
-defm : Expand24IBitOps<MULLO_INT_eg, ADD_INT>;
-
//===----------------------------------------------------------------------===//
// Memory read/write instructions
//===----------------------------------------------------------------------===//
Modified: llvm/trunk/lib/Target/AMDGPU/R600Instructions.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/R600Instructions.td?rev=259180&r1=259179&r2=259180&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/R600Instructions.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/R600Instructions.td Fri Jan 29 04:05:16 2016
@@ -1719,12 +1719,6 @@ def : DwordAddrPat <i32, R600_Reg32>;
} // End isR600toCayman Predicate
-let Predicates = [isR600] in {
-// Intrinsic patterns
-defm : Expand24IBitOps<MULLO_INT_r600, ADD_INT>;
-defm : Expand24UBitOps<MULLO_UINT_r600, ADD_INT>;
-} // End isR600
-
def getLDSNoRetOp : InstrMapping {
let FilterClass = "R600_LDS_1A1D";
let RowFields = ["BaseOp"];
Removed: llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.imad24.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.imad24.ll?rev=259179&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.imad24.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.imad24.ll (removed)
@@ -1,22 +0,0 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -march=r600 -mcpu=cayman -verify-machineinstrs < %s | FileCheck -check-prefix=CM -check-prefix=FUNC %s
-; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s
-; XUN: llc -march=r600 -mcpu=r600 -verify-machineinstrs < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s
-; XUN: llc -march=r600 -mcpu=r770 -verify-machineinstrs < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s
-
-; FIXME: Store of i32 seems to be broken pre-EG somehow?
-
-declare i32 @llvm.AMDGPU.imad24(i32, i32, i32) nounwind readnone
-
-; FUNC-LABEL: {{^}}test_imad24:
-; SI: v_mad_i32_i24
-; CM: MULADD_INT24
-; R600: MULLO_INT
-; R600: ADD_INT
-define void @test_imad24(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) nounwind {
- %mad = call i32 @llvm.AMDGPU.imad24(i32 %src0, i32 %src1, i32 %src2) nounwind readnone
- store i32 %mad, i32 addrspace(1)* %out, align 4
- ret void
-}
-
Removed: llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.imul24.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.imul24.ll?rev=259179&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.imul24.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.imul24.ll (removed)
@@ -1,16 +0,0 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -march=r600 -mcpu=cayman -verify-machineinstrs < %s | FileCheck -check-prefix=CM -check-prefix=FUNC %s
-; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s
-
-declare i32 @llvm.AMDGPU.imul24(i32, i32) nounwind readnone
-
-; FUNC-LABEL: {{^}}test_imul24:
-; SI: v_mul_i32_i24
-; CM: MUL_INT24
-; R600: MULLO_INT
-define void @test_imul24(i32 addrspace(1)* %out, i32 %src0, i32 %src1) nounwind {
- %mul = call i32 @llvm.AMDGPU.imul24(i32 %src0, i32 %src1) nounwind readnone
- store i32 %mul, i32 addrspace(1)* %out, align 4
- ret void
-}
Removed: llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.umad24.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.umad24.ll?rev=259179&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.umad24.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.umad24.ll (removed)
@@ -1,38 +0,0 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -march=r600 -mcpu=cayman -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
-; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
-; XUN: llc -march=r600 -mcpu=r600 -verify-machineinstrs < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s
-; XUN: llc -march=r600 -mcpu=rv770 -verify-machineinstrs < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s
-
-declare i32 @llvm.AMDGPU.umad24(i32, i32, i32) nounwind readnone
-declare i32 @llvm.r600.read.tidig.x() nounwind readnone
-
-; FUNC-LABEL: {{^}}test_umad24:
-; SI: v_mad_u32_u24
-; EG: MULADD_UINT24
-; R600: MULLO_UINT
-; R600: ADD_INT
-define void @test_umad24(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) nounwind {
- %mad = call i32 @llvm.AMDGPU.umad24(i32 %src0, i32 %src1, i32 %src2) nounwind readnone
- store i32 %mad, i32 addrspace(1)* %out, align 4
- ret void
-}
-
-; FUNC-LABEL: {{^}}commute_umad24:
-; SI-DAG: buffer_load_dword [[SRC0:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
-; SI-DAG: buffer_load_dword [[SRC2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
-; SI: v_mad_u32_u24 [[RESULT:v[0-9]+]], 4, [[SRC0]], [[SRC2]]
-; SI: buffer_store_dword [[RESULT]]
-define void @commute_umad24(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
- %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
- %src0.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
- %src2.gep = getelementptr i32, i32 addrspace(1)* %src0.gep, i32 1
-
- %src0 = load i32, i32 addrspace(1)* %src0.gep, align 4
- %src2 = load i32, i32 addrspace(1)* %src2.gep, align 4
- %mad = call i32 @llvm.AMDGPU.umad24(i32 %src0, i32 4, i32 %src2) nounwind readnone
- store i32 %mad, i32 addrspace(1)* %out.gep, align 4
- ret void
-}
-
Removed: llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.umul24.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.umul24.ll?rev=259179&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.umul24.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.AMDGPU.umul24.ll (removed)
@@ -1,18 +0,0 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
-; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
-; RUN: llc -march=r600 -mcpu=cayman -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
-; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
-; XUN: llc -march=r600 -mcpu=r600 -verify-machineinstrs < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s
-; XUN: llc -march=r600 -mcpu=r770 -verify-machineinstrs < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s
-
-declare i32 @llvm.AMDGPU.umul24(i32, i32) nounwind readnone
-
-; FUNC-LABEL: {{^}}test_umul24:
-; SI: v_mul_u32_u24
-; R600: MUL_UINT24
-; R600: MULLO_UINT
-define void @test_umul24(i32 addrspace(1)* %out, i32 %src0, i32 %src1) nounwind {
- %mul = call i32 @llvm.AMDGPU.umul24(i32 %src0, i32 %src1) nounwind readnone
- store i32 %mul, i32 addrspace(1)* %out, align 4
- ret void
-}
Modified: llvm/trunk/test/CodeGen/AMDGPU/mad_int24.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/mad_int24.ll?rev=259180&r1=259179&r2=259180&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/mad_int24.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/mad_int24.ll Fri Jan 29 04:05:16 2016
@@ -3,8 +3,6 @@
; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI --check-prefix=FUNC
; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s --check-prefix=SI --check-prefix=FUNC
-declare i32 @llvm.AMDGPU.imul24(i32, i32) nounwind readnone
-
; FUNC-LABEL: {{^}}i32_mad24:
; Signed 24-bit multiply is not supported on pre-Cayman GPUs.
; EG: MULLO_INT
@@ -24,12 +22,3 @@ entry:
store i32 %3, i32 addrspace(1)* %out
ret void
}
-
-; FUNC-LABEL: @test_imul24
-; SI: v_mad_i32_i24
-define void @test_imul24(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) nounwind {
- %mul = call i32 @llvm.AMDGPU.imul24(i32 %src0, i32 %src1) nounwind readnone
- %add = add i32 %mul, %src2
- store i32 %add, i32 addrspace(1)* %out, align 4
- ret void
-}
Modified: llvm/trunk/test/CodeGen/AMDGPU/use-sgpr-multiple-times.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/use-sgpr-multiple-times.ll?rev=259180&r1=259179&r2=259180&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/use-sgpr-multiple-times.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/use-sgpr-multiple-times.ll Fri Jan 29 04:05:16 2016
@@ -4,7 +4,7 @@
declare float @llvm.fma.f32(float, float, float) #1
declare double @llvm.fma.f64(double, double, double) #1
declare float @llvm.fmuladd.f32(float, float, float) #1
-declare i32 @llvm.AMDGPU.imad24(i32, i32, i32) #1
+declare float @llvm.amdgcn.div.fixup.f32(float, float, float) #1
; GCN-LABEL: {{^}}test_sgpr_use_twice_binop:
@@ -118,11 +118,11 @@ define void @test_sgpr_use_twice_ternary
; Don't use fma since fma c, x, y is canonicalized to fma x, c, y
; GCN-LABEL: {{^}}test_sgpr_use_twice_ternary_op_imm_a_a:
; GCN: s_load_dword [[SGPR:s[0-9]+]]
-; GCN: v_mad_i32_i24 [[RESULT:v[0-9]+]], 2, [[SGPR]], [[SGPR]]
+; GCN: v_div_fixup_f32 [[RESULT:v[0-9]+]], 2.0, [[SGPR]], [[SGPR]]
; GCN: buffer_store_dword [[RESULT]]
-define void @test_sgpr_use_twice_ternary_op_imm_a_a(i32 addrspace(1)* %out, i32 %a) #0 {
- %fma = call i32 @llvm.AMDGPU.imad24(i32 2, i32 %a, i32 %a) #1
- store i32 %fma, i32 addrspace(1)* %out, align 4
+define void @test_sgpr_use_twice_ternary_op_imm_a_a(float addrspace(1)* %out, float %a) #0 {
+ %val = call float @llvm.amdgcn.div.fixup.f32(float 2.0, float %a, float %a) #1
+ store float %val, float addrspace(1)* %out, align 4
ret void
}
More information about the llvm-commits
mailing list