[llvm] 7345753 - [AArch64][GlobalISel] Use custom legalization for G_TRUNC for v8i8 vectors.

Amara Emerson via llvm-commits llvm-commits at lists.llvm.org
Thu Oct 1 13:30:02 PDT 2020


Author: Amara Emerson
Date: 2020-10-01T13:22:00-07:00
New Revision: 73457536ff335a2cbe2381354512e0fcf9d703fd

URL: https://github.com/llvm/llvm-project/commit/73457536ff335a2cbe2381354512e0fcf9d703fd
DIFF: https://github.com/llvm/llvm-project/commit/73457536ff335a2cbe2381354512e0fcf9d703fd.diff

LOG: [AArch64][GlobalISel] Use custom legalization for G_TRUNC for v8i8 vectors.

Truncating to v8i8 is a case where we want to split the source but also generate
intermediate truncates to reduce the size of the source vector before truncating
down to v8i8. This implements the same strategy as what SelectionDAG does, but
I'm not certain where if anywhere in generic code it should live.

Use it for legalization of v8s8 = G_ICMP v8s32.

Differential Revision: https://reviews.llvm.org/D88191

Added: 
    

Modified: 
    llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
    llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h
    llvm/test/CodeGen/AArch64/GlobalISel/legalize-vector-icmp.mir

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 3311cc37f176..ffa49ad15b4c 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -24,6 +24,7 @@
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Type.h"
 #include <initializer_list>
+#include "llvm/Support/MathExtras.h"
 
 #define DEBUG_TYPE "aarch64-legalinfo"
 
@@ -373,7 +374,8 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
       .minScalarOrEltIf(
           [=](const LegalityQuery &Query) { return Query.Types[1] == v2p0; }, 0,
           s64)
-      .widenScalarOrEltToNextPow2(1);
+      .widenScalarOrEltToNextPow2(1)
+      .clampNumElements(0, v2s32, v4s32);
 
   getActionDefinitionsBuilder(G_FCMP)
       .legalFor({{s32, s32}, {s32, s64}})
@@ -412,7 +414,16 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
       .legalIf(ExtLegalFunc)
       .clampScalar(0, s64, s64); // Just for s128, others are handled above.
 
-  getActionDefinitionsBuilder(G_TRUNC).alwaysLegal();
+  getActionDefinitionsBuilder(G_TRUNC)
+      .minScalarOrEltIf(
+          [=](const LegalityQuery &Query) { return Query.Types[0].isVector(); },
+          0, s8)
+      .customIf([=](const LegalityQuery &Query) {
+        LLT DstTy = Query.Types[0];
+        LLT SrcTy = Query.Types[1];
+        return DstTy == v8s8 && SrcTy.getSizeInBits() > 128;
+      })
+      .alwaysLegal();
 
   getActionDefinitionsBuilder(G_SEXT_INREG).legalFor({s32, s64}).lower();
 
@@ -709,11 +720,60 @@ bool AArch64LegalizerInfo::legalizeCustom(LegalizerHelper &Helper,
     return legalizeShlAshrLshr(MI, MRI, MIRBuilder, Observer);
   case TargetOpcode::G_GLOBAL_VALUE:
     return legalizeSmallCMGlobalValue(MI, MRI, MIRBuilder, Observer);
+  case TargetOpcode::G_TRUNC:
+    return legalizeVectorTrunc(MI, Helper);
   }
 
   llvm_unreachable("expected switch to return");
 }
 
+static void extractParts(Register Reg, MachineRegisterInfo &MRI,
+                         MachineIRBuilder &MIRBuilder, LLT Ty, int NumParts,
+                         SmallVectorImpl<Register> &VRegs) {
+  for (int I = 0; I < NumParts; ++I)
+    VRegs.push_back(MRI.createGenericVirtualRegister(Ty));
+  MIRBuilder.buildUnmerge(VRegs, Reg);
+}
+
+bool AArch64LegalizerInfo::legalizeVectorTrunc(
+    MachineInstr &MI, LegalizerHelper &Helper) const {
+  MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
+  MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
+  // Similar to how operand splitting is done in SelectiondDAG, we can handle
+  // %res(v8s8) = G_TRUNC %in(v8s32) by generating:
+  //   %inlo(<4x s32>), %inhi(<4 x s32>) = G_UNMERGE %in(<8 x s32>)
+  //   %lo16(<4 x s16>) = G_TRUNC %inlo
+  //   %hi16(<4 x s16>) = G_TRUNC %inhi
+  //   %in16(<8 x s16>) = G_CONCAT_VECTORS %lo16, %hi16
+  //   %res(<8 x s8>) = G_TRUNC %in16
+
+  Register DstReg = MI.getOperand(0).getReg();
+  Register SrcReg = MI.getOperand(1).getReg();
+  LLT DstTy = MRI.getType(DstReg);
+  LLT SrcTy = MRI.getType(SrcReg);
+  assert(isPowerOf2_32(DstTy.getSizeInBits()) &&
+         isPowerOf2_32(SrcTy.getSizeInBits()));
+
+  // Split input type.
+  LLT SplitSrcTy = SrcTy.changeNumElements(SrcTy.getNumElements() / 2);
+  // First, split the source into two smaller vectors.
+  SmallVector<Register, 2> SplitSrcs;
+  extractParts(SrcReg, MRI, MIRBuilder, SplitSrcTy, 2, SplitSrcs);
+
+  // Truncate the splits into intermediate narrower elements.
+  LLT InterTy = SplitSrcTy.changeElementSize(DstTy.getScalarSizeInBits() * 2);
+  for (unsigned I = 0; I < SplitSrcs.size(); ++I)
+    SplitSrcs[I] = MIRBuilder.buildTrunc(InterTy, SplitSrcs[I]).getReg(0);
+
+  auto Concat = MIRBuilder.buildConcatVectors(
+      DstTy.changeElementSize(DstTy.getScalarSizeInBits() * 2), SplitSrcs);
+
+  Helper.Observer.changingInstr(MI);
+  MI.getOperand(1).setReg(Concat.getReg(0));
+  Helper.Observer.changedInstr(MI);
+  return true;
+}
+
 bool AArch64LegalizerInfo::legalizeSmallCMGlobalValue(
     MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder,
     GISelChangeObserver &Observer) const {

diff  --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h
index 1cb24559c1ab..8217e37c8512 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h
@@ -15,6 +15,7 @@
 #define LLVM_LIB_TARGET_AARCH64_AARCH64MACHINELEGALIZER_H
 
 #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
+#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
 #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
 
 namespace llvm {
@@ -45,6 +46,7 @@ class AArch64LegalizerInfo : public LegalizerInfo {
   bool legalizeSmallCMGlobalValue(MachineInstr &MI, MachineRegisterInfo &MRI,
                                   MachineIRBuilder &MIRBuilder,
                                   GISelChangeObserver &Observer) const;
+  bool legalizeVectorTrunc(MachineInstr &MI, LegalizerHelper &Helper) const;
   const AArch64Subtarget *ST;
 };
 } // End llvm namespace.

diff  --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-vector-icmp.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-vector-icmp.mir
index ce078624a982..ec0446d0236f 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-vector-icmp.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-vector-icmp.mir
@@ -1920,3 +1920,79 @@ body:             |
     RET_ReallyLR implicit $d0
 
 ...
+---
+name:            icmp_8xs1
+alignment:       4
+tracksRegLiveness: true
+liveins:
+  - { reg: '$q0' }
+  - { reg: '$q1' }
+  - { reg: '$q2' }
+  - { reg: '$q3' }
+body:             |
+  bb.1:
+    liveins: $q0, $q1, $q2, $q3
+
+    ; CHECK-LABEL: name: icmp_8xs1
+    ; CHECK: liveins: $q0, $q1, $q2, $q3
+    ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
+    ; CHECK: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1
+    ; CHECK: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $q2
+    ; CHECK: [[COPY3:%[0-9]+]]:_(<4 x s32>) = COPY $q3
+    ; CHECK: [[ICMP:%[0-9]+]]:_(<4 x s32>) = G_ICMP intpred(eq), [[COPY]](<4 x s32>), [[COPY2]]
+    ; CHECK: [[ICMP1:%[0-9]+]]:_(<4 x s32>) = G_ICMP intpred(eq), [[COPY1]](<4 x s32>), [[COPY3]]
+    ; CHECK: [[TRUNC:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[ICMP]](<4 x s32>)
+    ; CHECK: [[TRUNC1:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[ICMP1]](<4 x s32>)
+    ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[TRUNC]](<4 x s16>), [[TRUNC1]](<4 x s16>)
+    ; CHECK: [[TRUNC2:%[0-9]+]]:_(<8 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<8 x s16>)
+    ; CHECK: $d0 = COPY [[TRUNC2]](<8 x s8>)
+    ; CHECK: RET_ReallyLR implicit $d0
+    %2:_(<4 x s32>) = COPY $q0
+    %3:_(<4 x s32>) = COPY $q1
+    %0:_(<8 x s32>) = G_CONCAT_VECTORS %2(<4 x s32>), %3(<4 x s32>)
+    %4:_(<4 x s32>) = COPY $q2
+    %5:_(<4 x s32>) = COPY $q3
+    %1:_(<8 x s32>) = G_CONCAT_VECTORS %4(<4 x s32>), %5(<4 x s32>)
+    %6:_(<8 x s1>) = G_ICMP intpred(eq), %0(<8 x s32>), %1
+    %7:_(<8 x s8>) = G_ANYEXT %6(<8 x s1>)
+    $d0 = COPY %7(<8 x s8>)
+    RET_ReallyLR implicit $d0
+...
+---
+name:            icmp_8xs32
+alignment:       4
+tracksRegLiveness: true
+liveins:
+  - { reg: '$q0' }
+  - { reg: '$q1' }
+  - { reg: '$q2' }
+  - { reg: '$q3' }
+body:             |
+  bb.1:
+    liveins: $q0, $q1, $q2, $q3
+
+    ; CHECK-LABEL: name: icmp_8xs32
+    ; CHECK: liveins: $q0, $q1, $q2, $q3
+    ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
+    ; CHECK: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1
+    ; CHECK: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $q2
+    ; CHECK: [[COPY3:%[0-9]+]]:_(<4 x s32>) = COPY $q3
+    ; CHECK: [[ICMP:%[0-9]+]]:_(<4 x s32>) = G_ICMP intpred(eq), [[COPY]](<4 x s32>), [[COPY2]]
+    ; CHECK: [[ICMP1:%[0-9]+]]:_(<4 x s32>) = G_ICMP intpred(eq), [[COPY1]](<4 x s32>), [[COPY3]]
+    ; CHECK: [[TRUNC:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[ICMP]](<4 x s32>)
+    ; CHECK: [[TRUNC1:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[ICMP1]](<4 x s32>)
+    ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[TRUNC]](<4 x s16>), [[TRUNC1]](<4 x s16>)
+    ; CHECK: [[TRUNC2:%[0-9]+]]:_(<8 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<8 x s16>)
+    ; CHECK: $d0 = COPY [[TRUNC2]](<8 x s8>)
+    ; CHECK: RET_ReallyLR implicit $d0
+    %2:_(<4 x s32>) = COPY $q0
+    %3:_(<4 x s32>) = COPY $q1
+    %0:_(<8 x s32>) = G_CONCAT_VECTORS %2(<4 x s32>), %3(<4 x s32>)
+    %4:_(<4 x s32>) = COPY $q2
+    %5:_(<4 x s32>) = COPY $q3
+    %1:_(<8 x s32>) = G_CONCAT_VECTORS %4(<4 x s32>), %5(<4 x s32>)
+    %6:_(<8 x s32>) = G_ICMP intpred(eq), %0(<8 x s32>), %1
+    %7:_(<8 x s8>) = G_TRUNC %6(<8 x s32>)
+    $d0 = COPY %7(<8 x s8>)
+    RET_ReallyLR implicit $d0
+...


        


More information about the llvm-commits mailing list