[llvm] 0d5989b - [AMDGPU] Split R600 and GCN bfe patterns
Jay Foad via llvm-commits
llvm-commits at lists.llvm.org
Mon Oct 5 01:55:21 PDT 2020
Author: Jay Foad
Date: 2020-10-05T09:55:10+01:00
New Revision: 0d5989bb24934802a9e6fcca63848a57a91efcc8
URL: https://github.com/llvm/llvm-project/commit/0d5989bb24934802a9e6fcca63848a57a91efcc8
DIFF: https://github.com/llvm/llvm-project/commit/0d5989bb24934802a9e6fcca63848a57a91efcc8.diff
LOG: [AMDGPU] Split R600 and GCN bfe patterns
This is in preparation for making the GCN patterns divergence-aware.
NFC.
Differential Revision: https://reviews.llvm.org/D88579
Added:
Modified:
llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
llvm/lib/Target/AMDGPU/EvergreenInstructions.td
llvm/lib/Target/AMDGPU/SIInstructions.td
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
index 23e47c6cc14d..48b82ce395b9 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
@@ -597,53 +597,6 @@ class DwordAddrPat<ValueType vt, RegisterClass rc> : AMDGPUPat <
(vt rc:$addr)
>;
-// Bitfield extract patterns
-
-def IMMZeroBasedBitfieldMask : ImmLeaf <i32, [{
- return isMask_32(Imm);
-}]>;
-
-def IMMPopCount : SDNodeXForm<imm, [{
- return CurDAG->getTargetConstant(countPopulation(N->getZExtValue()), SDLoc(N),
- MVT::i32);
-}]>;
-
-multiclass BFEPattern <Instruction UBFE, Instruction SBFE, Instruction MOV> {
- def : AMDGPUPat <
- (i32 (and (i32 (srl i32:$src, i32:$rshift)), IMMZeroBasedBitfieldMask:$mask)),
- (UBFE $src, $rshift, (MOV (i32 (IMMPopCount $mask))))
- >;
-
- // x & ((1 << y) - 1)
- def : AMDGPUPat <
- (and i32:$src, (add_oneuse (shl_oneuse 1, i32:$width), -1)),
- (UBFE $src, (MOV (i32 0)), $width)
- >;
-
- // x & ~(-1 << y)
- def : AMDGPUPat <
- (and i32:$src, (xor_oneuse (shl_oneuse -1, i32:$width), -1)),
- (UBFE $src, (MOV (i32 0)), $width)
- >;
-
- // x & (-1 >> (bitwidth - y))
- def : AMDGPUPat <
- (and i32:$src, (srl_oneuse -1, (sub 32, i32:$width))),
- (UBFE $src, (MOV (i32 0)), $width)
- >;
-
- // x << (bitwidth - y) >> (bitwidth - y)
- def : AMDGPUPat <
- (srl (shl_oneuse i32:$src, (sub 32, i32:$width)), (sub 32, i32:$width)),
- (UBFE $src, (MOV (i32 0)), $width)
- >;
-
- def : AMDGPUPat <
- (sra (shl_oneuse i32:$src, (sub 32, i32:$width)), (sub 32, i32:$width)),
- (SBFE $src, (MOV (i32 0)), $width)
- >;
-}
-
// fshr pattern
class FSHRPattern <Instruction BIT_ALIGN> : AMDGPUPat <
(fshr i32:$src0, i32:$src1, i32:$src2),
diff --git a/llvm/lib/Target/AMDGPU/EvergreenInstructions.td b/llvm/lib/Target/AMDGPU/EvergreenInstructions.td
index a2782bf8b67d..cd9c056929c5 100644
--- a/llvm/lib/Target/AMDGPU/EvergreenInstructions.td
+++ b/llvm/lib/Target/AMDGPU/EvergreenInstructions.td
@@ -30,6 +30,15 @@ class EGOrCaymanPat<dag pattern, dag result> : AMDGPUPat<pattern, result> {
let SubtargetPredicate = isEGorCayman;
}
+def IMMZeroBasedBitfieldMask : ImmLeaf <i32, [{
+ return isMask_32(Imm);
+}]>;
+
+def IMMPopCount : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(countPopulation(N->getZExtValue()), SDLoc(N),
+ MVT::i32);
+}]>;
+
//===----------------------------------------------------------------------===//
// Evergreen / Cayman store instructions
//===----------------------------------------------------------------------===//
@@ -394,7 +403,41 @@ def BFE_INT_eg : R600_3OP <0x5, "BFE_INT",
VecALU
>;
-defm : BFEPattern <BFE_UINT_eg, BFE_INT_eg, MOV_IMM_I32>;
+// Bitfield extract patterns
+
+def : AMDGPUPat <
+ (and (i32 (srl i32:$src, i32:$rshift)), IMMZeroBasedBitfieldMask:$mask),
+ (BFE_UINT_eg $src, $rshift, (MOV_IMM_I32 (i32 (IMMPopCount $mask))))
+>;
+
+// x & ((1 << y) - 1)
+def : AMDGPUPat <
+ (and i32:$src, (add_oneuse (shl_oneuse 1, i32:$width), -1)),
+ (BFE_UINT_eg $src, (MOV_IMM_I32 (i32 0)), $width)
+>;
+
+// x & ~(-1 << y)
+def : AMDGPUPat <
+ (and i32:$src, (xor_oneuse (shl_oneuse -1, i32:$width), -1)),
+ (BFE_UINT_eg $src, (MOV_IMM_I32 (i32 0)), $width)
+>;
+
+// x & (-1 >> (bitwidth - y))
+def : AMDGPUPat <
+ (and i32:$src, (srl_oneuse -1, (sub 32, i32:$width))),
+ (BFE_UINT_eg $src, (MOV_IMM_I32 (i32 0)), $width)
+>;
+
+// x << (bitwidth - y) >> (bitwidth - y)
+def : AMDGPUPat <
+ (srl (shl_oneuse i32:$src, (sub 32, i32:$width)), (sub 32, i32:$width)),
+ (BFE_UINT_eg $src, (MOV_IMM_I32 (i32 0)), $width)
+>;
+
+def : AMDGPUPat <
+ (sra (shl_oneuse i32:$src, (sub 32, i32:$width)), (sub 32, i32:$width)),
+ (BFE_INT_eg $src, (MOV_IMM_I32 (i32 0)), $width)
+>;
def BFI_INT_eg : R600_3OP <0x06, "BFI_INT",
[(set i32:$dst, (AMDGPUbfi i32:$src0, i32:$src1, i32:$src2))],
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index 817fa0bf3ac7..d55cf0fc65ec 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -2315,7 +2315,50 @@ multiclass BFMPatterns <ValueType vt, InstSI BFM, InstSI MOV> {
defm : BFMPatterns <i32, S_BFM_B32, S_MOV_B32>;
// FIXME: defm : BFMPatterns <i64, S_BFM_B64, S_MOV_B64>;
-defm : BFEPattern <V_BFE_U32, V_BFE_I32, S_MOV_B32>;
+// Bitfield extract patterns
+
+def IMMZeroBasedBitfieldMask : ImmLeaf <i32, [{
+ return isMask_32(Imm);
+}]>;
+
+def IMMPopCount : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(countPopulation(N->getZExtValue()), SDLoc(N),
+ MVT::i32);
+}]>;
+
+def : AMDGPUPat <
+ (and (i32 (srl i32:$src, i32:$rshift)), IMMZeroBasedBitfieldMask:$mask),
+ (V_BFE_U32 $src, $rshift, (i32 (IMMPopCount $mask)))
+>;
+
+// x & ((1 << y) - 1)
+def : AMDGPUPat <
+ (and i32:$src, (add_oneuse (shl_oneuse 1, i32:$width), -1)),
+ (V_BFE_U32 $src, (i32 0), $width)
+>;
+
+// x & ~(-1 << y)
+def : AMDGPUPat <
+ (and i32:$src, (xor_oneuse (shl_oneuse -1, i32:$width), -1)),
+ (V_BFE_U32 $src, (i32 0), $width)
+>;
+
+// x & (-1 >> (bitwidth - y))
+def : AMDGPUPat <
+ (and i32:$src, (srl_oneuse -1, (sub 32, i32:$width))),
+ (V_BFE_U32 $src, (i32 0), $width)
+>;
+
+// x << (bitwidth - y) >> (bitwidth - y)
+def : AMDGPUPat <
+ (srl (shl_oneuse i32:$src, (sub 32, i32:$width)), (sub 32, i32:$width)),
+ (V_BFE_U32 $src, (i32 0), $width)
+>;
+
+def : AMDGPUPat <
+ (sra (shl_oneuse i32:$src, (sub 32, i32:$width)), (sub 32, i32:$width)),
+ (V_BFE_I32 $src, (i32 0), $width)
+>;
// SHA-256 Ma patterns
More information about the llvm-commits
mailing list