[llvm] 5263bf5 - [AMDGPU][GlobalISel] Legalization of G_ROTL and G_ROTR
Mirko Brkusanin via llvm-commits
llvm-commits at lists.llvm.org
Tue Sep 7 07:38:27 PDT 2021
Author: Mirko Brkusanin
Date: 2021-09-07T16:33:24+02:00
New Revision: 5263bf583a26f3f99c2ac34843e89d210bb4fe26
URL: https://github.com/llvm/llvm-project/commit/5263bf583a26f3f99c2ac34843e89d210bb4fe26
DIFF: https://github.com/llvm/llvm-project/commit/5263bf583a26f3f99c2ac34843e89d210bb4fe26.diff
LOG: [AMDGPU][GlobalISel] Legalization of G_ROTL and G_ROTR
Add implementation for the legalization of G_ROTL and G_ROTR machine
instructions. They are very similar to funnel shift instructions, the only
difference is funnel shifts have 3 operands, whereas rotate instructions have
two operands, the first being the register that is being rotated and the second
being the number of shifts. The legalization of G_ROTL/G_ROTR is just lowering
them into funnel shift instructions if they are legal.
Patch by: Mateja Marjanovic
Differential Revision: https://reviews.llvm.org/D105347
Added:
llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-rotl-rotr.mir
Modified:
llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
Removed:
################################################################################
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
index 9b652d8e16bcb..069f71b543286 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
@@ -1537,6 +1537,14 @@ class MachineIRBuilder {
return buildInstr(TargetOpcode::G_XOR, {Dst}, {Src0, NegOne});
}
+ /// Build and insert integer negation
+ /// \p Zero = G_CONSTANT 0
+ /// \p Res = G_SUB Zero, \p Op0
+ MachineInstrBuilder buildNeg(const DstOp &Dst, const SrcOp &Src0) {
+ auto Zero = buildConstant(Dst.getLLTTy(*getMRI()), 0);
+ return buildInstr(TargetOpcode::G_SUB, {Dst}, {Zero, Src0});
+ }
+
/// Build and insert \p Res = G_CTPOP \p Op0, \p Src0
MachineInstrBuilder buildCTPOP(const DstOp &Dst, const SrcOp &Src0) {
return buildInstr(TargetOpcode::G_CTPOP, {Dst}, {Src0});
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 315c199d72bc4..ca7fe92c069d1 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -4434,6 +4434,8 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
case G_FMAXIMUM:
case G_FSHL:
case G_FSHR:
+ case G_ROTL:
+ case G_ROTR:
case G_FREEZE:
case G_SADDSAT:
case G_SSUBSAT:
@@ -6079,6 +6081,27 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerRotate(MachineInstr &MI) {
isPowerOf2_32(EltSizeInBits))
return lowerRotateWithReverseRotate(MI);
+ // If a funnel shift is supported, use it.
+ unsigned FShOpc = IsLeft ? TargetOpcode::G_FSHL : TargetOpcode::G_FSHR;
+ unsigned RevFsh = !IsLeft ? TargetOpcode::G_FSHL : TargetOpcode::G_FSHR;
+ bool IsFShLegal = false;
+ if ((IsFShLegal = LI.isLegalOrCustom({FShOpc, {DstTy, AmtTy}})) ||
+ LI.isLegalOrCustom({RevFsh, {DstTy, AmtTy}})) {
+ auto buildFunnelShift = [&](unsigned Opc, Register R1, Register R2,
+ Register R3) {
+ MIRBuilder.buildInstr(Opc, {R1}, {R2, R2, R3});
+ MI.eraseFromParent();
+ return Legalized;
+ };
+ // If a funnel shift in the other direction is supported, use it.
+ if (IsFShLegal) {
+ return buildFunnelShift(FShOpc, Dst, Src, Amt);
+ } else if (isPowerOf2_32(EltSizeInBits)) {
+ Amt = MIRBuilder.buildNeg(DstTy, Amt).getReg(0);
+ return buildFunnelShift(RevFsh, Dst, Src, Amt);
+ }
+ }
+
auto Zero = MIRBuilder.buildConstant(AmtTy, 0);
unsigned ShOpc = IsLeft ? TargetOpcode::G_SHL : TargetOpcode::G_LSHR;
unsigned RevShiftOpc = IsLeft ? TargetOpcode::G_LSHR : TargetOpcode::G_SHL;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index 2efada6758cc0..8341bf93ecbbe 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -1626,6 +1626,10 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
.clampScalar(0, S32, S64)
.lower();
+ getActionDefinitionsBuilder({G_ROTR, G_ROTL})
+ .scalarize(0)
+ .lower();
+
// TODO: Only Try to form v2s16 with legal packed instructions.
getActionDefinitionsBuilder(G_FSHR)
.legalFor({{S32, S32}})
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-rotl-rotr.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-rotl-rotr.mir
new file mode 100644
index 0000000000000..f8f6b99ffce8c
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-rotl-rotr.mir
@@ -0,0 +1,462 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -global-isel -march=amdgcn -mcpu=tahiti -verify-machineinstrs -run-pass=legalizer %s -o - | FileCheck %s --check-prefixes=GFX,GFX6
+# RUN: llc -global-isel -march=amdgcn -mcpu=fiji -verify-machineinstrs -run-pass=legalizer %s -o - | FileCheck %s --check-prefixes=GFX,GFX8
+
+---
+name: rotl_i15
+legalized: false
+regBankSelected: false
+selected: false
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $sgpr0, $sgpr1
+
+ ; GFX6-LABEL: name: rotl_i15
+ ; GFX6: liveins: $sgpr0, $sgpr1
+ ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0
+ ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1
+ ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 14
+ ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 15
+ ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 32767
+ ; GFX6: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]]
+ ; GFX6: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
+ ; GFX6: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[COPY2]](s32)
+ ; GFX6: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32)
+ ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000
+ ; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C3]]
+ ; GFX6: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32)
+ ; GFX6: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C4]], [[COPY2]]
+ ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]]
+ ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
+ ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]]
+ ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]]
+ ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[COPY2]]
+ ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]]
+ ; GFX6: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[COPY2]]
+ ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[COPY2]]
+ ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]]
+ ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[COPY2]]
+ ; GFX6: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[COPY2]]
+ ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]]
+ ; GFX6: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SELECT1]], [[C2]]
+ ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[AND1]](s32)
+ ; GFX6: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C]], [[SELECT1]]
+ ; GFX6: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C5]](s32)
+ ; GFX6: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C2]]
+ ; GFX6: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY3]](s32)
+ ; GFX6: [[AND3:%[0-9]+]]:_(s32) = G_AND [[SUB4]], [[C2]]
+ ; GFX6: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]]
+ ; GFX6: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[AND3]](s32)
+ ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[LSHR1]]
+ ; GFX6: $sgpr0 = COPY [[OR]](s32)
+ ; GFX8-LABEL: name: rotl_i15
+ ; GFX8: liveins: $sgpr0, $sgpr1
+ ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0
+ ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1
+ ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 15
+ ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32767
+ ; GFX8: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]]
+ ; GFX8: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+ ; GFX8: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[COPY2]](s32)
+ ; GFX8: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32)
+ ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000
+ ; GFX8: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]]
+ ; GFX8: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32)
+ ; GFX8: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[COPY2]]
+ ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]]
+ ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
+ ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]]
+ ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]]
+ ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[COPY2]]
+ ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]]
+ ; GFX8: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[COPY2]]
+ ; GFX8: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[COPY2]]
+ ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]]
+ ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[COPY2]]
+ ; GFX8: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[COPY2]]
+ ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]]
+ ; GFX8: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SELECT1]], [[C1]]
+ ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+ ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[AND1]](s32)
+ ; GFX8: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC1]](s16)
+ ; GFX8: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 14
+ ; GFX8: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SELECT1]](s32)
+ ; GFX8: [[SUB4:%[0-9]+]]:_(s16) = G_SUB [[C5]], [[TRUNC2]]
+ ; GFX8: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C4]](s32)
+ ; GFX8: [[C6:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767
+ ; GFX8: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C6]]
+ ; GFX8: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32)
+ ; GFX8: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[AND2]], [[TRUNC3]](s16)
+ ; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SUB4]](s16)
+ ; GFX8: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C1]]
+ ; GFX8: [[AND4:%[0-9]+]]:_(s16) = G_AND [[LSHR]], [[C6]]
+ ; GFX8: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[AND3]](s32)
+ ; GFX8: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[AND4]], [[TRUNC4]](s16)
+ ; GFX8: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[SHL]](s16)
+ ; GFX8: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16)
+ ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[ANYEXT1]], [[ANYEXT2]]
+ ; GFX8: $sgpr0 = COPY [[OR]](s32)
+ %2:_(s32) = COPY $sgpr0
+ %0:_(s15) = G_TRUNC %2(s32)
+ %3:_(s32) = COPY $sgpr1
+ %1:_(s15) = G_TRUNC %3(s32)
+ %5:_(s15) = G_ROTL %0, %1(s15)
+ %4:_(s32) = G_ANYEXT %5(s15)
+ $sgpr0 = COPY %4
+
+...
+---
+name: rotl_i16
+legalized: false
+regBankSelected: false
+selected: false
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $sgpr0, $sgpr1
+
+ ; GFX6-LABEL: name: rotl_i16
+ ; GFX6: liveins: $sgpr0, $sgpr1
+ ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0
+ ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1
+ ; GFX6: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+ ; GFX6: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 15
+ ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[COPY1]]
+ ; GFX6: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SUB]](s32)
+ ; GFX6: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]]
+ ; GFX6: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[AND]](s16)
+ ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[ZEXT]](s32)
+ ; GFX6: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+ ; GFX6: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]]
+ ; GFX6: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[AND1]](s16)
+ ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+ ; GFX6: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C2]]
+ ; GFX6: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[ZEXT1]](s32)
+ ; GFX6: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+ ; GFX6: [[OR:%[0-9]+]]:_(s16) = G_OR [[TRUNC2]], [[TRUNC3]]
+ ; GFX6: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
+ ; GFX6: $sgpr0 = COPY [[ANYEXT]](s32)
+ ; GFX8-LABEL: name: rotl_i16
+ ; GFX8: liveins: $sgpr0, $sgpr1
+ ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0
+ ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+ ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1
+ ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+ ; GFX8: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 0
+ ; GFX8: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 15
+ ; GFX8: [[SUB:%[0-9]+]]:_(s16) = G_SUB [[C]], [[TRUNC1]]
+ ; GFX8: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
+ ; GFX8: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[AND]](s16)
+ ; GFX8: [[AND1:%[0-9]+]]:_(s16) = G_AND [[SUB]], [[C1]]
+ ; GFX8: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[AND1]](s16)
+ ; GFX8: [[OR:%[0-9]+]]:_(s16) = G_OR [[SHL]], [[LSHR]]
+ ; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
+ ; GFX8: $sgpr0 = COPY [[ANYEXT]](s32)
+ %2:_(s32) = COPY $sgpr0
+ %0:_(s16) = G_TRUNC %2(s32)
+ %3:_(s32) = COPY $sgpr1
+ %1:_(s16) = G_TRUNC %3(s32)
+ %5:_(s16) = G_ROTL %0, %1(s16)
+ %4:_(s32) = G_ANYEXT %5(s16)
+ $sgpr0 = COPY %4
+
+...
+---
+name: rotl_i32
+legalized: false
+regBankSelected: false
+selected: false
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $sgpr0, $sgpr1
+
+ ; GFX-LABEL: name: rotl_i32
+ ; GFX: liveins: $sgpr0, $sgpr1
+ ; GFX: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0
+ ; GFX: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1
+ ; GFX: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; GFX: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C]], [[COPY1]]
+ ; GFX: [[FSHR:%[0-9]+]]:_(s32) = G_FSHR [[COPY]], [[COPY]], [[SUB]](s32)
+ ; GFX: $sgpr0 = COPY [[FSHR]](s32)
+ %0:_(s32) = COPY $sgpr0
+ %1:_(s32) = COPY $sgpr1
+ %2:_(s32) = G_ROTL %0, %1(s32)
+ $sgpr0 = COPY %2
+
+...
+---
+name: rotl_i31
+legalized: false
+regBankSelected: false
+selected: false
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $sgpr0, $sgpr1
+
+ ; GFX-LABEL: name: rotl_i31
+ ; GFX: liveins: $sgpr0, $sgpr1
+ ; GFX: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0
+ ; GFX: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1
+ ; GFX: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 30
+ ; GFX: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
+ ; GFX: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647
+ ; GFX: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]]
+ ; GFX: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
+ ; GFX: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[COPY2]](s32)
+ ; GFX: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32)
+ ; GFX: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000
+ ; GFX: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C3]]
+ ; GFX: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32)
+ ; GFX: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; GFX: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C4]], [[COPY2]]
+ ; GFX: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]]
+ ; GFX: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
+ ; GFX: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]]
+ ; GFX: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]]
+ ; GFX: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[COPY2]]
+ ; GFX: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]]
+ ; GFX: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; GFX: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[COPY2]]
+ ; GFX: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[COPY2]]
+ ; GFX: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]]
+ ; GFX: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[COPY2]]
+ ; GFX: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[COPY2]]
+ ; GFX: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]]
+ ; GFX: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SELECT1]], [[C2]]
+ ; GFX: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[AND1]](s32)
+ ; GFX: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C]], [[SELECT1]]
+ ; GFX: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C5]](s32)
+ ; GFX: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C2]]
+ ; GFX: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY3]](s32)
+ ; GFX: [[AND3:%[0-9]+]]:_(s32) = G_AND [[SUB4]], [[C2]]
+ ; GFX: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]]
+ ; GFX: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[AND3]](s32)
+ ; GFX: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[LSHR1]]
+ ; GFX: $sgpr0 = COPY [[OR]](s32)
+ %0:_(s32) = COPY $sgpr0
+ %1:_(s32) = COPY $sgpr1
+ %2:_(s31) = G_TRUNC %0(s32)
+ %3:_(s31) = G_TRUNC %1(s32)
+ %4:_(s31) = G_ROTL %2, %3(s31)
+ %5:_(s32) = G_ANYEXT %4(s31)
+ $sgpr0 = COPY %5
+
+...
+---
+name: rotl_i64
+legalized: false
+regBankSelected: false
+selected: false
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1, $sgpr2_sgpr3
+
+ ; GFX-LABEL: name: rotl_i64
+ ; GFX: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3
+ ; GFX: [[COPY:%[0-9]+]]:_(s64) = COPY $sgpr0_sgpr1
+ ; GFX: [[COPY1:%[0-9]+]]:_(s64) = COPY $sgpr2_sgpr3
+ ; GFX: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; GFX: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 63
+ ; GFX: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64)
+ ; GFX: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64)
+ ; GFX: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV]], [[UV2]]
+ ; GFX: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV1]], [[UV3]], [[USUBO1]]
+ ; GFX: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO]](s32), [[USUBE]](s32)
+ ; GFX: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C1]]
+ ; GFX: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[AND]](s64)
+ ; GFX: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[TRUNC]](s32)
+ ; GFX: [[AND1:%[0-9]+]]:_(s64) = G_AND [[MV]], [[C1]]
+ ; GFX: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[AND1]](s64)
+ ; GFX: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY]], [[TRUNC1]](s32)
+ ; GFX: [[OR:%[0-9]+]]:_(s64) = G_OR [[SHL]], [[LSHR]]
+ ; GFX: $sgpr0_sgpr1 = COPY [[OR]](s64)
+ %0:_(s64) = COPY $sgpr0_sgpr1
+ %1:_(s64) = COPY $sgpr2_sgpr3
+ %2:_(s64) = G_ROTL %0, %1(s64)
+ $sgpr0_sgpr1 = COPY %2
+
+...
+---
+name: rotl_v4i32
+legalized: false
+regBankSelected: false
+selected: false
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4_sgpr5_sgpr6_sgpr7
+
+ ; GFX-LABEL: name: rotl_v4i32
+ ; GFX: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4_sgpr5_sgpr6_sgpr7
+ ; GFX: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr4_sgpr5_sgpr6_sgpr7
+ ; GFX: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>)
+ ; GFX: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>)
+ ; GFX: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; GFX: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C]], [[UV4]]
+ ; GFX: [[FSHR:%[0-9]+]]:_(s32) = G_FSHR [[UV]], [[UV]], [[SUB]](s32)
+ ; GFX: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[UV5]]
+ ; GFX: [[FSHR1:%[0-9]+]]:_(s32) = G_FSHR [[UV1]], [[UV1]], [[SUB1]](s32)
+ ; GFX: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[C]], [[UV6]]
+ ; GFX: [[FSHR2:%[0-9]+]]:_(s32) = G_FSHR [[UV2]], [[UV2]], [[SUB2]](s32)
+ ; GFX: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C]], [[UV7]]
+ ; GFX: [[FSHR3:%[0-9]+]]:_(s32) = G_FSHR [[UV3]], [[UV3]], [[SUB3]](s32)
+ ; GFX: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[FSHR]](s32), [[FSHR1]](s32), [[FSHR2]](s32), [[FSHR3]](s32)
+ ; GFX: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
+ %0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ %1:_(<4 x s32>) = COPY $sgpr4_sgpr5_sgpr6_sgpr7
+ %2:_(<4 x s32>) = G_ROTL %0, %1(<4 x s32>)
+ $sgpr0_sgpr1_sgpr2_sgpr3 = COPY %2
+
+...
+---
+name: rotr_i16
+legalized: false
+regBankSelected: false
+selected: false
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $sgpr0, $sgpr1
+
+ ; GFX6-LABEL: name: rotr_i16
+ ; GFX6: liveins: $sgpr0, $sgpr1
+ ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0
+ ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1
+ ; GFX6: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+ ; GFX6: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 15
+ ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[COPY1]]
+ ; GFX6: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SUB]](s32)
+ ; GFX6: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]]
+ ; GFX6: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[AND]](s16)
+ ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+ ; GFX6: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C2]]
+ ; GFX6: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[ZEXT]](s32)
+ ; GFX6: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+ ; GFX6: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]]
+ ; GFX6: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[AND2]](s16)
+ ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[ZEXT1]](s32)
+ ; GFX6: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+ ; GFX6: [[OR:%[0-9]+]]:_(s16) = G_OR [[TRUNC2]], [[TRUNC3]]
+ ; GFX6: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
+ ; GFX6: $sgpr0 = COPY [[ANYEXT]](s32)
+ ; GFX8-LABEL: name: rotr_i16
+ ; GFX8: liveins: $sgpr0, $sgpr1
+ ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0
+ ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+ ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1
+ ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+ ; GFX8: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 0
+ ; GFX8: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 15
+ ; GFX8: [[SUB:%[0-9]+]]:_(s16) = G_SUB [[C]], [[TRUNC1]]
+ ; GFX8: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
+ ; GFX8: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[AND]](s16)
+ ; GFX8: [[AND1:%[0-9]+]]:_(s16) = G_AND [[SUB]], [[C1]]
+ ; GFX8: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[AND1]](s16)
+ ; GFX8: [[OR:%[0-9]+]]:_(s16) = G_OR [[LSHR]], [[SHL]]
+ ; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
+ ; GFX8: $sgpr0 = COPY [[ANYEXT]](s32)
+ %2:_(s32) = COPY $sgpr0
+ %0:_(s16) = G_TRUNC %2(s32)
+ %3:_(s32) = COPY $sgpr1
+ %1:_(s16) = G_TRUNC %3(s32)
+ %5:_(s16) = G_ROTR %0, %1(s16)
+ %4:_(s32) = G_ANYEXT %5(s16)
+ $sgpr0 = COPY %4
+
+...
+---
+name: rotr_i32
+legalized: false
+regBankSelected: false
+selected: false
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $sgpr0, $sgpr1
+
+ ; GFX-LABEL: name: rotr_i32
+ ; GFX: liveins: $sgpr0, $sgpr1
+ ; GFX: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0
+ ; GFX: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1
+ ; GFX: [[FSHR:%[0-9]+]]:_(s32) = G_FSHR [[COPY]], [[COPY]], [[COPY1]](s32)
+ ; GFX: $sgpr0 = COPY [[FSHR]](s32)
+ %0:_(s32) = COPY $sgpr0
+ %1:_(s32) = COPY $sgpr1
+ %2:_(s32) = G_ROTR %0, %1(s32)
+ $sgpr0 = COPY %2
+
+...
+---
+name: rotr_i64
+legalized: false
+regBankSelected: false
+selected: false
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1, $sgpr2_sgpr3
+
+ ; GFX-LABEL: name: rotr_i64
+ ; GFX: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3
+ ; GFX: [[COPY:%[0-9]+]]:_(s64) = COPY $sgpr0_sgpr1
+ ; GFX: [[COPY1:%[0-9]+]]:_(s64) = COPY $sgpr2_sgpr3
+ ; GFX: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; GFX: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 63
+ ; GFX: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64)
+ ; GFX: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64)
+ ; GFX: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV]], [[UV2]]
+ ; GFX: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV1]], [[UV3]], [[USUBO1]]
+ ; GFX: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO]](s32), [[USUBE]](s32)
+ ; GFX: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C1]]
+ ; GFX: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[AND]](s64)
+ ; GFX: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY]], [[TRUNC]](s32)
+ ; GFX: [[AND1:%[0-9]+]]:_(s64) = G_AND [[MV]], [[C1]]
+ ; GFX: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[AND1]](s64)
+ ; GFX: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[TRUNC1]](s32)
+ ; GFX: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR]], [[SHL]]
+ ; GFX: $sgpr0_sgpr1 = COPY [[OR]](s64)
+ %0:_(s64) = COPY $sgpr0_sgpr1
+ %1:_(s64) = COPY $sgpr2_sgpr3
+ %2:_(s64) = G_ROTR %0, %1(s64)
+ $sgpr0_sgpr1 = COPY %2
+
+...
+---
+name: rotr_v4i32
+legalized: false
+regBankSelected: false
+selected: false
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4_sgpr5_sgpr6_sgpr7
+
+ ; GFX-LABEL: name: rotr_v4i32
+ ; GFX: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4_sgpr5_sgpr6_sgpr7
+ ; GFX: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr4_sgpr5_sgpr6_sgpr7
+ ; GFX: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>)
+ ; GFX: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>)
+ ; GFX: [[FSHR:%[0-9]+]]:_(s32) = G_FSHR [[UV]], [[UV]], [[UV4]](s32)
+ ; GFX: [[FSHR1:%[0-9]+]]:_(s32) = G_FSHR [[UV1]], [[UV1]], [[UV5]](s32)
+ ; GFX: [[FSHR2:%[0-9]+]]:_(s32) = G_FSHR [[UV2]], [[UV2]], [[UV6]](s32)
+ ; GFX: [[FSHR3:%[0-9]+]]:_(s32) = G_FSHR [[UV3]], [[UV3]], [[UV7]](s32)
+ ; GFX: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[FSHR]](s32), [[FSHR1]](s32), [[FSHR2]](s32), [[FSHR3]](s32)
+ ; GFX: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
+ %0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ %1:_(<4 x s32>) = COPY $sgpr4_sgpr5_sgpr6_sgpr7
+ %2:_(<4 x s32>) = G_ROTR %0, %1(<4 x s32>)
+ $sgpr0_sgpr1_sgpr2_sgpr3 = COPY %2
+
+...
More information about the llvm-commits
mailing list