[llvm] 5263bf5 - [AMDGPU][GlobalISel] Legalization of G_ROTL and G_ROTR

Mirko Brkusanin via llvm-commits llvm-commits at lists.llvm.org
Tue Sep 7 07:38:27 PDT 2021


Author: Mirko Brkusanin
Date: 2021-09-07T16:33:24+02:00
New Revision: 5263bf583a26f3f99c2ac34843e89d210bb4fe26

URL: https://github.com/llvm/llvm-project/commit/5263bf583a26f3f99c2ac34843e89d210bb4fe26
DIFF: https://github.com/llvm/llvm-project/commit/5263bf583a26f3f99c2ac34843e89d210bb4fe26.diff

LOG: [AMDGPU][GlobalISel] Legalization of G_ROTL and G_ROTR

Add implementation for the legalization of G_ROTL and G_ROTR machine
instructions. They are very similar to funnel shift instructions, the only
difference is funnel shifts have 3 operands, whereas rotate instructions have
two operands, the first being the register that is being rotated and the second
being the number of shifts. The legalization of G_ROTL/G_ROTR is just lowering
them into funnel shift instructions if they are legal.

Patch by: Mateja Marjanovic

Differential Revision: https://reviews.llvm.org/D105347

Added: 
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-rotl-rotr.mir

Modified: 
    llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
    llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
    llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
index 9b652d8e16bcb..069f71b543286 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
@@ -1537,6 +1537,14 @@ class MachineIRBuilder {
     return buildInstr(TargetOpcode::G_XOR, {Dst}, {Src0, NegOne});
   }
 
+  /// Build and insert integer negation
+  /// \p Zero = G_CONSTANT 0
+  /// \p Res = G_SUB Zero, \p Op0
+  MachineInstrBuilder buildNeg(const DstOp &Dst, const SrcOp &Src0) {
+    auto Zero = buildConstant(Dst.getLLTTy(*getMRI()), 0);
+    return buildInstr(TargetOpcode::G_SUB, {Dst}, {Zero, Src0});
+  }
+
   /// Build and insert \p Res = G_CTPOP \p Op0, \p Src0
   MachineInstrBuilder buildCTPOP(const DstOp &Dst, const SrcOp &Src0) {
     return buildInstr(TargetOpcode::G_CTPOP, {Dst}, {Src0});

diff  --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 315c199d72bc4..ca7fe92c069d1 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -4434,6 +4434,8 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
   case G_FMAXIMUM:
   case G_FSHL:
   case G_FSHR:
+  case G_ROTL:
+  case G_ROTR:
   case G_FREEZE:
   case G_SADDSAT:
   case G_SSUBSAT:
@@ -6079,6 +6081,27 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerRotate(MachineInstr &MI) {
       isPowerOf2_32(EltSizeInBits))
     return lowerRotateWithReverseRotate(MI);
 
+  // If a funnel shift is supported, use it.
+  unsigned FShOpc = IsLeft ? TargetOpcode::G_FSHL : TargetOpcode::G_FSHR;
+  unsigned RevFsh = !IsLeft ? TargetOpcode::G_FSHL : TargetOpcode::G_FSHR;
+  bool IsFShLegal = false;
+  if ((IsFShLegal = LI.isLegalOrCustom({FShOpc, {DstTy, AmtTy}})) ||
+      LI.isLegalOrCustom({RevFsh, {DstTy, AmtTy}})) {
+    auto buildFunnelShift = [&](unsigned Opc, Register R1, Register R2,
+                                Register R3) {
+      MIRBuilder.buildInstr(Opc, {R1}, {R2, R2, R3});
+      MI.eraseFromParent();
+      return Legalized;
+    };
+    // If a funnel shift in the other direction is supported, use it.
+    if (IsFShLegal) {
+      return buildFunnelShift(FShOpc, Dst, Src, Amt);
+    } else if (isPowerOf2_32(EltSizeInBits)) {
+      Amt = MIRBuilder.buildNeg(DstTy, Amt).getReg(0);
+      return buildFunnelShift(RevFsh, Dst, Src, Amt);
+    }
+  }
+
   auto Zero = MIRBuilder.buildConstant(AmtTy, 0);
   unsigned ShOpc = IsLeft ? TargetOpcode::G_SHL : TargetOpcode::G_LSHR;
   unsigned RevShiftOpc = IsLeft ? TargetOpcode::G_LSHR : TargetOpcode::G_SHL;

diff  --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index 2efada6758cc0..8341bf93ecbbe 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -1626,6 +1626,10 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
     .clampScalar(0, S32, S64)
     .lower();
 
+  getActionDefinitionsBuilder({G_ROTR, G_ROTL})
+    .scalarize(0)
+    .lower();
+
   // TODO: Only Try to form v2s16 with legal packed instructions.
   getActionDefinitionsBuilder(G_FSHR)
     .legalFor({{S32, S32}})

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-rotl-rotr.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-rotl-rotr.mir
new file mode 100644
index 0000000000000..f8f6b99ffce8c
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-rotl-rotr.mir
@@ -0,0 +1,462 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -global-isel -march=amdgcn -mcpu=tahiti -verify-machineinstrs -run-pass=legalizer %s -o - | FileCheck %s --check-prefixes=GFX,GFX6
+# RUN: llc -global-isel -march=amdgcn -mcpu=fiji -verify-machineinstrs -run-pass=legalizer %s -o - | FileCheck %s --check-prefixes=GFX,GFX8
+
+---
+name:            rotl_i15
+legalized:       false
+regBankSelected: false
+selected:        false
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $sgpr0, $sgpr1
+
+    ; GFX6-LABEL: name: rotl_i15
+    ; GFX6: liveins: $sgpr0, $sgpr1
+    ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0
+    ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1
+    ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 14
+    ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 15
+    ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 32767
+    ; GFX6: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]]
+    ; GFX6: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
+    ; GFX6: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[COPY2]](s32)
+    ; GFX6: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32)
+    ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000
+    ; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C3]]
+    ; GFX6: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32)
+    ; GFX6: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C4]], [[COPY2]]
+    ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]]
+    ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
+    ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]]
+    ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]]
+    ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[COPY2]]
+    ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]]
+    ; GFX6: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[COPY2]]
+    ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[COPY2]]
+    ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]]
+    ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[COPY2]]
+    ; GFX6: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[COPY2]]
+    ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]]
+    ; GFX6: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SELECT1]], [[C2]]
+    ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[AND1]](s32)
+    ; GFX6: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C]], [[SELECT1]]
+    ; GFX6: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C5]](s32)
+    ; GFX6: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C2]]
+    ; GFX6: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY3]](s32)
+    ; GFX6: [[AND3:%[0-9]+]]:_(s32) = G_AND [[SUB4]], [[C2]]
+    ; GFX6: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]]
+    ; GFX6: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[AND3]](s32)
+    ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[LSHR1]]
+    ; GFX6: $sgpr0 = COPY [[OR]](s32)
+    ; GFX8-LABEL: name: rotl_i15
+    ; GFX8: liveins: $sgpr0, $sgpr1
+    ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0
+    ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1
+    ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 15
+    ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32767
+    ; GFX8: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]]
+    ; GFX8: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; GFX8: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[COPY2]](s32)
+    ; GFX8: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32)
+    ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000
+    ; GFX8: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]]
+    ; GFX8: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32)
+    ; GFX8: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[COPY2]]
+    ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]]
+    ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
+    ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]]
+    ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]]
+    ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[COPY2]]
+    ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]]
+    ; GFX8: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[COPY2]]
+    ; GFX8: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[COPY2]]
+    ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]]
+    ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[COPY2]]
+    ; GFX8: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[COPY2]]
+    ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]]
+    ; GFX8: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SELECT1]], [[C1]]
+    ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+    ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[AND1]](s32)
+    ; GFX8: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC1]](s16)
+    ; GFX8: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 14
+    ; GFX8: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SELECT1]](s32)
+    ; GFX8: [[SUB4:%[0-9]+]]:_(s16) = G_SUB [[C5]], [[TRUNC2]]
+    ; GFX8: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C4]](s32)
+    ; GFX8: [[C6:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767
+    ; GFX8: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C6]]
+    ; GFX8: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32)
+    ; GFX8: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[AND2]], [[TRUNC3]](s16)
+    ; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SUB4]](s16)
+    ; GFX8: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C1]]
+    ; GFX8: [[AND4:%[0-9]+]]:_(s16) = G_AND [[LSHR]], [[C6]]
+    ; GFX8: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[AND3]](s32)
+    ; GFX8: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[AND4]], [[TRUNC4]](s16)
+    ; GFX8: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[SHL]](s16)
+    ; GFX8: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16)
+    ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[ANYEXT1]], [[ANYEXT2]]
+    ; GFX8: $sgpr0 = COPY [[OR]](s32)
+    %2:_(s32) = COPY $sgpr0
+    %0:_(s15) = G_TRUNC %2(s32)
+    %3:_(s32) = COPY $sgpr1
+    %1:_(s15) = G_TRUNC %3(s32)
+    %5:_(s15) = G_ROTL %0, %1(s15)
+    %4:_(s32) = G_ANYEXT %5(s15)
+    $sgpr0 = COPY %4
+
+...
+---
+name:            rotl_i16
+legalized:       false
+regBankSelected: false
+selected:        false
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $sgpr0, $sgpr1
+
+    ; GFX6-LABEL: name: rotl_i16
+    ; GFX6: liveins: $sgpr0, $sgpr1
+    ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0
+    ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1
+    ; GFX6: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+    ; GFX6: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 15
+    ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[COPY1]]
+    ; GFX6: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SUB]](s32)
+    ; GFX6: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]]
+    ; GFX6: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[AND]](s16)
+    ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[ZEXT]](s32)
+    ; GFX6: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; GFX6: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]]
+    ; GFX6: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[AND1]](s16)
+    ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; GFX6: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C2]]
+    ; GFX6: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[ZEXT1]](s32)
+    ; GFX6: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+    ; GFX6: [[OR:%[0-9]+]]:_(s16) = G_OR [[TRUNC2]], [[TRUNC3]]
+    ; GFX6: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
+    ; GFX6: $sgpr0 = COPY [[ANYEXT]](s32)
+    ; GFX8-LABEL: name: rotl_i16
+    ; GFX8: liveins: $sgpr0, $sgpr1
+    ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0
+    ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+    ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1
+    ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+    ; GFX8: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 0
+    ; GFX8: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 15
+    ; GFX8: [[SUB:%[0-9]+]]:_(s16) = G_SUB [[C]], [[TRUNC1]]
+    ; GFX8: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
+    ; GFX8: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[AND]](s16)
+    ; GFX8: [[AND1:%[0-9]+]]:_(s16) = G_AND [[SUB]], [[C1]]
+    ; GFX8: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[AND1]](s16)
+    ; GFX8: [[OR:%[0-9]+]]:_(s16) = G_OR [[SHL]], [[LSHR]]
+    ; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
+    ; GFX8: $sgpr0 = COPY [[ANYEXT]](s32)
+    %2:_(s32) = COPY $sgpr0
+    %0:_(s16) = G_TRUNC %2(s32)
+    %3:_(s32) = COPY $sgpr1
+    %1:_(s16) = G_TRUNC %3(s32)
+    %5:_(s16) = G_ROTL %0, %1(s16)
+    %4:_(s32) = G_ANYEXT %5(s16)
+    $sgpr0 = COPY %4
+
+...
+---
+name:            rotl_i32
+legalized:       false
+regBankSelected: false
+selected:        false
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $sgpr0, $sgpr1
+
+    ; GFX-LABEL: name: rotl_i32
+    ; GFX: liveins: $sgpr0, $sgpr1
+    ; GFX: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0
+    ; GFX: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1
+    ; GFX: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; GFX: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C]], [[COPY1]]
+    ; GFX: [[FSHR:%[0-9]+]]:_(s32) = G_FSHR [[COPY]], [[COPY]], [[SUB]](s32)
+    ; GFX: $sgpr0 = COPY [[FSHR]](s32)
+    %0:_(s32) = COPY $sgpr0
+    %1:_(s32) = COPY $sgpr1
+    %2:_(s32) = G_ROTL %0, %1(s32)
+    $sgpr0 = COPY %2
+
+...
+---
+name:            rotl_i31
+legalized:       false
+regBankSelected: false
+selected:        false
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $sgpr0, $sgpr1
+
+    ; GFX-LABEL: name: rotl_i31
+    ; GFX: liveins: $sgpr0, $sgpr1
+    ; GFX: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0
+    ; GFX: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1
+    ; GFX: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 30
+    ; GFX: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
+    ; GFX: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647
+    ; GFX: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]]
+    ; GFX: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
+    ; GFX: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[COPY2]](s32)
+    ; GFX: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32)
+    ; GFX: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000
+    ; GFX: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C3]]
+    ; GFX: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32)
+    ; GFX: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; GFX: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C4]], [[COPY2]]
+    ; GFX: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]]
+    ; GFX: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]]
+    ; GFX: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]]
+    ; GFX: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]]
+    ; GFX: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[COPY2]]
+    ; GFX: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]]
+    ; GFX: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[COPY2]]
+    ; GFX: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[COPY2]]
+    ; GFX: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]]
+    ; GFX: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[COPY2]]
+    ; GFX: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[COPY2]]
+    ; GFX: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]]
+    ; GFX: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SELECT1]], [[C2]]
+    ; GFX: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[AND1]](s32)
+    ; GFX: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C]], [[SELECT1]]
+    ; GFX: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C5]](s32)
+    ; GFX: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C2]]
+    ; GFX: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY3]](s32)
+    ; GFX: [[AND3:%[0-9]+]]:_(s32) = G_AND [[SUB4]], [[C2]]
+    ; GFX: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]]
+    ; GFX: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[AND3]](s32)
+    ; GFX: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[LSHR1]]
+    ; GFX: $sgpr0 = COPY [[OR]](s32)
+    %0:_(s32) = COPY $sgpr0
+    %1:_(s32) = COPY $sgpr1
+    %2:_(s31) = G_TRUNC %0(s32)
+    %3:_(s31) = G_TRUNC %1(s32)
+    %4:_(s31) = G_ROTL %2, %3(s31)
+    %5:_(s32) = G_ANYEXT %4(s31)
+    $sgpr0 = COPY %5
+
+...
+---
+name:            rotl_i64
+legalized:       false
+regBankSelected: false
+selected:        false
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $sgpr0_sgpr1, $sgpr2_sgpr3
+
+    ; GFX-LABEL: name: rotl_i64
+    ; GFX: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3
+    ; GFX: [[COPY:%[0-9]+]]:_(s64) = COPY $sgpr0_sgpr1
+    ; GFX: [[COPY1:%[0-9]+]]:_(s64) = COPY $sgpr2_sgpr3
+    ; GFX: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; GFX: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 63
+    ; GFX: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64)
+    ; GFX: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64)
+    ; GFX: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV]], [[UV2]]
+    ; GFX: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV1]], [[UV3]], [[USUBO1]]
+    ; GFX: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO]](s32), [[USUBE]](s32)
+    ; GFX: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C1]]
+    ; GFX: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[AND]](s64)
+    ; GFX: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[TRUNC]](s32)
+    ; GFX: [[AND1:%[0-9]+]]:_(s64) = G_AND [[MV]], [[C1]]
+    ; GFX: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[AND1]](s64)
+    ; GFX: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY]], [[TRUNC1]](s32)
+    ; GFX: [[OR:%[0-9]+]]:_(s64) = G_OR [[SHL]], [[LSHR]]
+    ; GFX: $sgpr0_sgpr1 = COPY [[OR]](s64)
+    %0:_(s64) = COPY $sgpr0_sgpr1
+    %1:_(s64) = COPY $sgpr2_sgpr3
+    %2:_(s64) = G_ROTL %0, %1(s64)
+    $sgpr0_sgpr1 = COPY %2
+
+...
+---
+name:            rotl_v4i32
+legalized:       false
+regBankSelected: false
+selected:        false
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4_sgpr5_sgpr6_sgpr7
+
+    ; GFX-LABEL: name: rotl_v4i32
+    ; GFX: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4_sgpr5_sgpr6_sgpr7
+    ; GFX: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+    ; GFX: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr4_sgpr5_sgpr6_sgpr7
+    ; GFX: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>)
+    ; GFX: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>)
+    ; GFX: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; GFX: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C]], [[UV4]]
+    ; GFX: [[FSHR:%[0-9]+]]:_(s32) = G_FSHR [[UV]], [[UV]], [[SUB]](s32)
+    ; GFX: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C]], [[UV5]]
+    ; GFX: [[FSHR1:%[0-9]+]]:_(s32) = G_FSHR [[UV1]], [[UV1]], [[SUB1]](s32)
+    ; GFX: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[C]], [[UV6]]
+    ; GFX: [[FSHR2:%[0-9]+]]:_(s32) = G_FSHR [[UV2]], [[UV2]], [[SUB2]](s32)
+    ; GFX: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C]], [[UV7]]
+    ; GFX: [[FSHR3:%[0-9]+]]:_(s32) = G_FSHR [[UV3]], [[UV3]], [[SUB3]](s32)
+    ; GFX: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[FSHR]](s32), [[FSHR1]](s32), [[FSHR2]](s32), [[FSHR3]](s32)
+    ; GFX: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
+    %0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+    %1:_(<4 x s32>) = COPY $sgpr4_sgpr5_sgpr6_sgpr7
+    %2:_(<4 x s32>) = G_ROTL %0, %1(<4 x s32>)
+    $sgpr0_sgpr1_sgpr2_sgpr3 = COPY %2
+
+...
+---
+name:            rotr_i16
+legalized:       false
+regBankSelected: false
+selected:        false
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $sgpr0, $sgpr1
+
+    ; GFX6-LABEL: name: rotr_i16
+    ; GFX6: liveins: $sgpr0, $sgpr1
+    ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0
+    ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1
+    ; GFX6: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+    ; GFX6: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 15
+    ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[COPY1]]
+    ; GFX6: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SUB]](s32)
+    ; GFX6: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]]
+    ; GFX6: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[AND]](s16)
+    ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; GFX6: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C2]]
+    ; GFX6: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[ZEXT]](s32)
+    ; GFX6: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+    ; GFX6: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]]
+    ; GFX6: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[AND2]](s16)
+    ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[ZEXT1]](s32)
+    ; GFX6: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; GFX6: [[OR:%[0-9]+]]:_(s16) = G_OR [[TRUNC2]], [[TRUNC3]]
+    ; GFX6: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
+    ; GFX6: $sgpr0 = COPY [[ANYEXT]](s32)
+    ; GFX8-LABEL: name: rotr_i16
+    ; GFX8: liveins: $sgpr0, $sgpr1
+    ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0
+    ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+    ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1
+    ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+    ; GFX8: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 0
+    ; GFX8: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 15
+    ; GFX8: [[SUB:%[0-9]+]]:_(s16) = G_SUB [[C]], [[TRUNC1]]
+    ; GFX8: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]]
+    ; GFX8: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[AND]](s16)
+    ; GFX8: [[AND1:%[0-9]+]]:_(s16) = G_AND [[SUB]], [[C1]]
+    ; GFX8: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[AND1]](s16)
+    ; GFX8: [[OR:%[0-9]+]]:_(s16) = G_OR [[LSHR]], [[SHL]]
+    ; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
+    ; GFX8: $sgpr0 = COPY [[ANYEXT]](s32)
+    %2:_(s32) = COPY $sgpr0
+    %0:_(s16) = G_TRUNC %2(s32)
+    %3:_(s32) = COPY $sgpr1
+    %1:_(s16) = G_TRUNC %3(s32)
+    %5:_(s16) = G_ROTR %0, %1(s16)
+    %4:_(s32) = G_ANYEXT %5(s16)
+    $sgpr0 = COPY %4
+
+...
+---
+name:            rotr_i32
+legalized:       false
+regBankSelected: false
+selected:        false
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $sgpr0, $sgpr1
+
+    ; GFX-LABEL: name: rotr_i32
+    ; GFX: liveins: $sgpr0, $sgpr1
+    ; GFX: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0
+    ; GFX: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1
+    ; GFX: [[FSHR:%[0-9]+]]:_(s32) = G_FSHR [[COPY]], [[COPY]], [[COPY1]](s32)
+    ; GFX: $sgpr0 = COPY [[FSHR]](s32)
+    %0:_(s32) = COPY $sgpr0
+    %1:_(s32) = COPY $sgpr1
+    %2:_(s32) = G_ROTR %0, %1(s32)
+    $sgpr0 = COPY %2
+
+...
+---
+name:            rotr_i64
+legalized:       false
+regBankSelected: false
+selected:        false
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $sgpr0_sgpr1, $sgpr2_sgpr3
+
+    ; GFX-LABEL: name: rotr_i64
+    ; GFX: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3
+    ; GFX: [[COPY:%[0-9]+]]:_(s64) = COPY $sgpr0_sgpr1
+    ; GFX: [[COPY1:%[0-9]+]]:_(s64) = COPY $sgpr2_sgpr3
+    ; GFX: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; GFX: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 63
+    ; GFX: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64)
+    ; GFX: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64)
+    ; GFX: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV]], [[UV2]]
+    ; GFX: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV1]], [[UV3]], [[USUBO1]]
+    ; GFX: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO]](s32), [[USUBE]](s32)
+    ; GFX: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C1]]
+    ; GFX: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[AND]](s64)
+    ; GFX: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY]], [[TRUNC]](s32)
+    ; GFX: [[AND1:%[0-9]+]]:_(s64) = G_AND [[MV]], [[C1]]
+    ; GFX: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[AND1]](s64)
+    ; GFX: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[TRUNC1]](s32)
+    ; GFX: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR]], [[SHL]]
+    ; GFX: $sgpr0_sgpr1 = COPY [[OR]](s64)
+    %0:_(s64) = COPY $sgpr0_sgpr1
+    %1:_(s64) = COPY $sgpr2_sgpr3
+    %2:_(s64) = G_ROTR %0, %1(s64)
+    $sgpr0_sgpr1 = COPY %2
+
+...
+---
+name:            rotr_v4i32
+legalized:       false
+regBankSelected: false
+selected:        false
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4_sgpr5_sgpr6_sgpr7
+
+    ; GFX-LABEL: name: rotr_v4i32
+    ; GFX: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4_sgpr5_sgpr6_sgpr7
+    ; GFX: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+    ; GFX: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr4_sgpr5_sgpr6_sgpr7
+    ; GFX: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>)
+    ; GFX: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>)
+    ; GFX: [[FSHR:%[0-9]+]]:_(s32) = G_FSHR [[UV]], [[UV]], [[UV4]](s32)
+    ; GFX: [[FSHR1:%[0-9]+]]:_(s32) = G_FSHR [[UV1]], [[UV1]], [[UV5]](s32)
+    ; GFX: [[FSHR2:%[0-9]+]]:_(s32) = G_FSHR [[UV2]], [[UV2]], [[UV6]](s32)
+    ; GFX: [[FSHR3:%[0-9]+]]:_(s32) = G_FSHR [[UV3]], [[UV3]], [[UV7]](s32)
+    ; GFX: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[FSHR]](s32), [[FSHR1]](s32), [[FSHR2]](s32), [[FSHR3]](s32)
+    ; GFX: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
+    %0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+    %1:_(<4 x s32>) = COPY $sgpr4_sgpr5_sgpr6_sgpr7
+    %2:_(<4 x s32>) = G_ROTR %0, %1(<4 x s32>)
+    $sgpr0_sgpr1_sgpr2_sgpr3 = COPY %2
+
+...


        


More information about the llvm-commits mailing list