[PATCH] D59501: [AMDGPU] Enable code selection using `s_mul_hi_u32`/`s_mul_hi_i32`.
Michael Liao via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Mon Mar 18 11:20:39 PDT 2019
hliao created this revision.
Herald added subscribers: llvm-commits, hiraditya, t-tye, tpr, dstuttard, yaxunl, nhaehnle, wdng, jvesely, kzhuravl, arsenm.
Herald added a project: LLVM.
Repository:
rG LLVM Github Monorepo
https://reviews.llvm.org/D59501
Files:
llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
llvm/lib/Target/AMDGPU/SOPInstructions.td
llvm/test/CodeGen/AMDGPU/mul.ll
Index: llvm/test/CodeGen/AMDGPU/mul.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/mul.ll
+++ llvm/test/CodeGen/AMDGPU/mul.ll
@@ -1,5 +1,6 @@
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=GCN,SI,FUNC %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=GCN,VI,FUNC %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=FUNC,GFX9 %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=redwood < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=EG,FUNC %s
; mul24 and mad24 are affected
@@ -139,6 +140,8 @@
; crash with a 'failed to select' error.
; FUNC-LABEL: {{^}}s_mul_i64:
+; GFX9: s_mul_hi_u32
+; GFX9: s_endpgm
define amdgpu_kernel void @s_mul_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
%mul = mul i64 %a, %b
store i64 %mul, i64 addrspace(1)* %out, align 8
Index: llvm/lib/Target/AMDGPU/SOPInstructions.td
===================================================================
--- llvm/lib/Target/AMDGPU/SOPInstructions.td
+++ llvm/lib/Target/AMDGPU/SOPInstructions.td
@@ -548,8 +548,12 @@
def S_LSHL4_ADD_U32 : SOP2_32<"s_lshl4_add_u32">;
} // End Defs = [SCC]
- def S_MUL_HI_U32 : SOP2_32<"s_mul_hi_u32">;
- def S_MUL_HI_I32 : SOP2_32<"s_mul_hi_i32">;
+ let isCommutable = 1 in {
+ def S_MUL_HI_U32 : SOP2_32<"s_mul_hi_u32",
+ [(set i32:$sdst, (UniformBinFrag<mulhu> SSrc_b32:$src0, SSrc_b32:$src1))]>;
+ def S_MUL_HI_I32 : SOP2_32<"s_mul_hi_i32",
+ [(set i32:$sdst, (UniformBinFrag<mulhs> SSrc_b32:$src0, SSrc_b32:$src1))]>;
+ }
}
//===----------------------------------------------------------------------===//
Index: llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -3193,6 +3193,8 @@
return AMDGPU::V_SUB_I32_e32;
case AMDGPU::S_SUBB_U32: return AMDGPU::V_SUBB_U32_e32;
case AMDGPU::S_MUL_I32: return AMDGPU::V_MUL_LO_I32;
+ case AMDGPU::S_MUL_HI_U32: return AMDGPU::V_MUL_HI_U32;
+ case AMDGPU::S_MUL_HI_I32: return AMDGPU::V_MUL_HI_I32;
case AMDGPU::S_AND_B32: return AMDGPU::V_AND_B32_e64;
case AMDGPU::S_OR_B32: return AMDGPU::V_OR_B32_e64;
case AMDGPU::S_XOR_B32: return AMDGPU::V_XOR_B32_e64;
@@ -3237,6 +3239,8 @@
case AMDGPU::S_CBRANCH_SCC0: return AMDGPU::S_CBRANCH_VCCZ;
case AMDGPU::S_CBRANCH_SCC1: return AMDGPU::S_CBRANCH_VCCNZ;
}
+ llvm_unreachable(
+ "Unexpected scalar opcode without corresponding vector one!");
}
const TargetRegisterClass *SIInstrInfo::getOpRegClass(const MachineInstr &MI,
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D59501.191131.patch
Type: text/x-patch
Size: 3035 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20190318/db82f4dc/attachment.bin>
More information about the llvm-commits
mailing list