[llvm] d3a56f0 - [AArch64][GlobalISel] Allow G_DUP for elements smaller than 32 B.

Jessica Paquette via llvm-commits llvm-commits at lists.llvm.org
Fri Jun 12 09:41:08 PDT 2020


Author: Jessica Paquette
Date: 2020-06-12T09:40:34-07:00
New Revision: d3a56f062b40ac6246b84ae179a485933ff39db2

URL: https://github.com/llvm/llvm-project/commit/d3a56f062b40ac6246b84ae179a485933ff39db2
DIFF: https://github.com/llvm/llvm-project/commit/d3a56f062b40ac6246b84ae179a485933ff39db2.diff

LOG: [AArch64][GlobalISel] Allow G_DUP for elements smaller than 32 B.

We select all of these via patterns now, so there's no reason to disallow this.

Update select-dup.mir to show that we correctly select the smaller types.

Differential Revision: https://reviews.llvm.org/D81322

Added: 
    

Modified: 
    llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp
    llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-combiner-shuffle-splat.mir
    llvm/test/CodeGen/AArch64/GlobalISel/select-dup.mir

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp
index 1ce69a8900eb..a2cad2fad4c1 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp
@@ -266,11 +266,6 @@ static bool matchDup(MachineInstr &MI, MachineRegisterInfo &MRI,
     return false;
 
   Register Dst = MI.getOperand(0).getReg();
-  if (MRI.getType(Dst).getScalarSizeInBits() < 32) {
-    LLVM_DEBUG(dbgs() << "Could not optimize splat pattern < 32b elts yet");
-    return false;
-  }
-
   MatchInfo =
       ShuffleVectorPseudo(AArch64::G_DUP, Dst, {InsMI->getOperand(2).getReg()});
   return true;

diff  --git a/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-combiner-shuffle-splat.mir b/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-combiner-shuffle-splat.mir
index d01bfdff3175..2ff208b0f6f0 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-combiner-shuffle-splat.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-combiner-shuffle-splat.mir
@@ -292,3 +292,51 @@ body:             |
     %4:fpr(<4 x s32>) = G_SHUFFLE_VECTOR %1(<4 x s32>), %2, shufflemask(-1, 0, 0, 3)
     $q0 = COPY %4(<4 x s32>)
     RET_ReallyLR implicit $q0
+
+...
+---
+name:            splat_4xi16
+alignment:       4
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.1.entry:
+    liveins: $h0
+    ; CHECK-LABEL: name: splat_4xi16
+    ; CHECK: liveins: $h0
+    ; CHECK: %copy:fpr(s16) = COPY $h0
+    ; CHECK: %splat:fpr(<4 x s16>) = G_DUP %copy(s16)
+    ; CHECK: $d0 = COPY %splat(<4 x s16>)
+    ; CHECK: RET_ReallyLR implicit $d0
+    %copy:fpr(s16) = COPY $h0
+    %undef:fpr(<4 x s16>) = G_IMPLICIT_DEF
+    %cst:gpr(s32) = G_CONSTANT i32 0
+    %ins:fpr(<4 x s16>) = G_INSERT_VECTOR_ELT %undef, %copy(s16), %cst(s32)
+    %splat:fpr(<4 x s16>) = G_SHUFFLE_VECTOR %ins(<4 x s16>), %undef, shufflemask(0, 0, 0, 0)
+    $d0 = COPY %splat(<4 x s16>)
+    RET_ReallyLR implicit $d0
+
+...
+---
+name:            splat_8xi8
+alignment:       4
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.1.entry:
+    liveins: $w0
+    ; CHECK-LABEL: name: splat_8xi8
+    ; CHECK: liveins: $w0
+    ; CHECK: %copy:gpr(s32) = COPY $w0
+    ; CHECK: %splat:fpr(<8 x s8>) = G_DUP %copy(s32)
+    ; CHECK: $d0 = COPY %splat(<8 x s8>)
+    ; CHECK: RET_ReallyLR implicit $d0
+    %copy:gpr(s32) = COPY $w0
+    %undef:fpr(<8 x s8>) = G_IMPLICIT_DEF
+    %cst:gpr(s32) = G_CONSTANT i32 0
+    %ins:fpr(<8 x s8>) = G_INSERT_VECTOR_ELT %undef, %copy(s32), %cst(s32)
+    %splat:fpr(<8 x s8>) = G_SHUFFLE_VECTOR %ins(<8 x s8>), %undef, shufflemask(0, 0, 0, 0, 0, 0, 0, 0)
+    $d0 = COPY %splat(<8 x s8>)
+    RET_ReallyLR implicit $d0

diff  --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-dup.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-dup.mir
index c99431e33c87..1848e338b7aa 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/select-dup.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-dup.mir
@@ -1,31 +1,31 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
 # RUN: llc -mtriple=aarch64 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s
+#
+# GPR variants should not use INSERT_SUBREG. FPR variants (DUP<ty>lane) should.
 
 ...
 ---
-name:            splat_4xi32
-alignment:       4
+name:            DUPv4i32gpr
 legalized:       true
 regBankSelected: true
 tracksRegLiveness: true
 body:             |
   bb.0.entry:
     liveins: $w0
-
-    ; CHECK-LABEL: name: splat_4xi32
+    ; CHECK-LABEL: name: DUPv4i32gpr
     ; CHECK: liveins: $w0
-    ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w0
-    ; CHECK: [[DUPv4i32gpr:%[0-9]+]]:fpr128 = DUPv4i32gpr [[COPY]]
-    ; CHECK: $q0 = COPY [[DUPv4i32gpr]]
+    ; CHECK: %copy:gpr32 = COPY $w0
+    ; CHECK: %dup:fpr128 = DUPv4i32gpr %copy
+    ; CHECK: $q0 = COPY %dup
     ; CHECK: RET_ReallyLR implicit $q0
-    %0:gpr(s32) = COPY $w0
-    %4:fpr(<4 x s32>) = G_DUP %0(s32)
-    $q0 = COPY %4(<4 x s32>)
+    %copy:gpr(s32) = COPY $w0
+    %dup:fpr(<4 x s32>) = G_DUP %copy(s32)
+    $q0 = COPY %dup(<4 x s32>)
     RET_ReallyLR implicit $q0
 
 ...
 ---
-name:            splat_2xi64
+name:            DUPv2i64gpr
 alignment:       4
 legalized:       true
 regBankSelected: true
@@ -33,21 +33,20 @@ tracksRegLiveness: true
 body:             |
   bb.0.entry:
     liveins: $x0
-
-    ; CHECK-LABEL: name: splat_2xi64
+    ; CHECK-LABEL: name: DUPv2i64gpr
     ; CHECK: liveins: $x0
-    ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
-    ; CHECK: [[DUPv2i64gpr:%[0-9]+]]:fpr128 = DUPv2i64gpr [[COPY]]
-    ; CHECK: $q0 = COPY [[DUPv2i64gpr]]
+    ; CHECK: %copy:gpr64 = COPY $x0
+    ; CHECK: %dup:fpr128 = DUPv2i64gpr %copy
+    ; CHECK: $q0 = COPY %dup
     ; CHECK: RET_ReallyLR implicit $q0
-    %0:gpr(s64) = COPY $x0
-    %4:fpr(<2 x s64>) = G_DUP %0(s64)
-    $q0 = COPY %4(<2 x s64>)
+    %copy:gpr(s64) = COPY $x0
+    %dup:fpr(<2 x s64>) = G_DUP %copy(s64)
+    $q0 = COPY %dup(<2 x s64>)
     RET_ReallyLR implicit $q0
 
 ...
 ---
-name:            splat_2xi32
+name:            DUPv2i32gpr
 alignment:       4
 legalized:       true
 regBankSelected: true
@@ -55,21 +54,20 @@ tracksRegLiveness: true
 body:             |
   bb.0.entry:
     liveins: $w0
-
-    ; CHECK-LABEL: name: splat_2xi32
+    ; CHECK-LABEL: name: DUPv2i32gpr
     ; CHECK: liveins: $w0
-    ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w0
-    ; CHECK: [[DUPv2i32gpr:%[0-9]+]]:fpr64 = DUPv2i32gpr [[COPY]]
-    ; CHECK: $d0 = COPY [[DUPv2i32gpr]]
+    ; CHECK: %copy:gpr32 = COPY $w0
+    ; CHECK: %dup:fpr64 = DUPv2i32gpr %copy
+    ; CHECK: $d0 = COPY %dup
     ; CHECK: RET_ReallyLR implicit $d0
-    %0:gpr(s32) = COPY $w0
-    %4:fpr(<2 x s32>) = G_DUP %0(s32)
-    $d0 = COPY %4(<2 x s32>)
+    %copy:gpr(s32) = COPY $w0
+    %dup:fpr(<2 x s32>) = G_DUP %copy(s32)
+    $d0 = COPY %dup(<2 x s32>)
     RET_ReallyLR implicit $d0
 
 ...
 ---
-name:            splat_4xf32
+name:            DUPv4i32lane
 alignment:       4
 legalized:       true
 regBankSelected: true
@@ -78,22 +76,22 @@ body:             |
   bb.0.entry:
     liveins: $s0
 
-    ; CHECK-LABEL: name: splat_4xf32
+    ; CHECK-LABEL: name: DUPv4i32lane
     ; CHECK: liveins: $s0
-    ; CHECK: [[COPY:%[0-9]+]]:fpr32 = COPY $s0
+    ; CHECK: %copy:fpr32 = COPY $s0
     ; CHECK: [[DEF:%[0-9]+]]:fpr128 = IMPLICIT_DEF
-    ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], [[COPY]], %subreg.ssub
-    ; CHECK: [[DUPv4i32lane:%[0-9]+]]:fpr128 = DUPv4i32lane [[INSERT_SUBREG]], 0
-    ; CHECK: $q0 = COPY [[DUPv4i32lane]]
+    ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], %copy, %subreg.ssub
+    ; CHECK: %dup:fpr128 = DUPv4i32lane [[INSERT_SUBREG]], 0
+    ; CHECK: $q0 = COPY %dup
     ; CHECK: RET_ReallyLR implicit $q0
-    %0:fpr(s32) = COPY $s0
-    %4:fpr(<4 x s32>) = G_DUP %0(s32)
-    $q0 = COPY %4(<4 x s32>)
+    %copy:fpr(s32) = COPY $s0
+    %dup:fpr(<4 x s32>) = G_DUP %copy(s32)
+    $q0 = COPY %dup(<4 x s32>)
     RET_ReallyLR implicit $q0
 
 ...
 ---
-name:            splat_2xf64
+name:            DUPv2i64lane
 alignment:       4
 legalized:       true
 regBankSelected: true
@@ -101,23 +99,22 @@ tracksRegLiveness: true
 body:             |
   bb.0.entry:
     liveins: $d0
-
-    ; CHECK-LABEL: name: splat_2xf64
+    ; CHECK-LABEL: name: DUPv2i64lane
     ; CHECK: liveins: $d0
-    ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
+    ; CHECK: %copy:fpr64 = COPY $d0
     ; CHECK: [[DEF:%[0-9]+]]:fpr128 = IMPLICIT_DEF
-    ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], [[COPY]], %subreg.dsub
-    ; CHECK: [[DUPv2i64lane:%[0-9]+]]:fpr128 = DUPv2i64lane [[INSERT_SUBREG]], 0
-    ; CHECK: $q0 = COPY [[DUPv2i64lane]]
+    ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], %copy, %subreg.dsub
+    ; CHECK: %dup:fpr128 = DUPv2i64lane [[INSERT_SUBREG]], 0
+    ; CHECK: $q0 = COPY %dup
     ; CHECK: RET_ReallyLR implicit $q0
-    %0:fpr(s64) = COPY $d0
-    %4:fpr(<2 x s64>) = G_DUP %0(s64)
-    $q0 = COPY %4(<2 x s64>)
+    %copy:fpr(s64) = COPY $d0
+    %dup:fpr(<2 x s64>) = G_DUP %copy(s64)
+    $q0 = COPY %dup(<2 x s64>)
     RET_ReallyLR implicit $q0
 
 ...
 ---
-name:            splat_2xf32
+name:            DUPv2i32lane
 alignment:       4
 legalized:       true
 regBankSelected: true
@@ -125,40 +122,145 @@ tracksRegLiveness: true
 body:             |
   bb.0.entry:
     liveins: $s0
-
-    ; CHECK-LABEL: name: splat_2xf32
+    ; CHECK-LABEL: name: DUPv2i32lane
     ; CHECK: liveins: $s0
-    ; CHECK: [[COPY:%[0-9]+]]:fpr32 = COPY $s0
+    ; CHECK: %copy:fpr32 = COPY $s0
     ; CHECK: [[DEF:%[0-9]+]]:fpr128 = IMPLICIT_DEF
-    ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], [[COPY]], %subreg.ssub
-    ; CHECK: [[DUPv2i32lane:%[0-9]+]]:fpr64 = DUPv2i32lane [[INSERT_SUBREG]], 0
-    ; CHECK: $d0 = COPY [[DUPv2i32lane]]
+    ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], %copy, %subreg.ssub
+    ; CHECK: %dup:fpr64 = DUPv2i32lane [[INSERT_SUBREG]], 0
+    ; CHECK: $d0 = COPY %dup
     ; CHECK: RET_ReallyLR implicit $d0
-    %0:fpr(s32) = COPY $s0
-    %4:fpr(<2 x s32>) = G_DUP %0(s32)
-    $d0 = COPY %4(<2 x s32>)
+    %copy:fpr(s32) = COPY $s0
+    %dup:fpr(<2 x s32>) = G_DUP %copy(s32)
+    $d0 = COPY %dup(<2 x s32>)
     RET_ReallyLR implicit $d0
 
+
 ...
 ---
-name:            splat_2xf64_copies
+name:            DUPv4i16lane
 alignment:       4
 legalized:       true
 regBankSelected: true
 tracksRegLiveness: true
 body:             |
   bb.0.entry:
-    liveins: $d0
+    liveins: $h0
+    ; CHECK-LABEL: name: DUPv4i16lane
+    ; CHECK: liveins: $h0
+    ; CHECK: %copy:fpr16 = COPY $h0
+    ; CHECK: [[DEF:%[0-9]+]]:fpr128 = IMPLICIT_DEF
+    ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], %copy, %subreg.hsub
+    ; CHECK: %dup:fpr64 = DUPv4i16lane [[INSERT_SUBREG]], 0
+    ; CHECK: $d0 = COPY %dup
+    ; CHECK: RET_ReallyLR implicit $d0
+    %copy:fpr(s16) = COPY $h0
+    %dup:fpr(<4 x s16>) = G_DUP %copy(s16)
+    $d0 = COPY %dup(<4 x s16>)
+    RET_ReallyLR implicit $d0
+...
+---
+name:            DUPv4i16gpr
+alignment:       4
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $w0
+    ; CHECK-LABEL: name: DUPv4i16gpr
+    ; CHECK: liveins: $w0
+    ; CHECK: %copy:gpr32 = COPY $w0
+    ; CHECK: %dup:fpr64 = DUPv4i16gpr %copy
+    ; CHECK: $d0 = COPY %dup
+    ; CHECK: RET_ReallyLR implicit $d0
+    %copy:gpr(s32) = COPY $w0
+    %dup:fpr(<4 x s16>) = G_DUP %copy(s32)
+    $d0 = COPY %dup(<4 x s16>)
+    RET_ReallyLR implicit $d0
 
-    ; CHECK-LABEL: name: splat_2xf64_copies
-    ; CHECK: liveins: $d0
-    ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
+...
+---
+name:            DUPv8i16lane
+alignment:       4
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $h0
+    ; CHECK-LABEL: name: DUPv8i16lane
+    ; CHECK: liveins: $h0
+    ; CHECK: %copy:fpr16 = COPY $h0
     ; CHECK: [[DEF:%[0-9]+]]:fpr128 = IMPLICIT_DEF
-    ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], [[COPY]], %subreg.dsub
-    ; CHECK: [[DUPv2i64lane:%[0-9]+]]:fpr128 = DUPv2i64lane [[INSERT_SUBREG]], 0
-    ; CHECK: $q0 = COPY [[DUPv2i64lane]]
+    ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], %copy, %subreg.hsub
+    ; CHECK: %dup:fpr128 = DUPv8i16lane [[INSERT_SUBREG]], 0
+    ; CHECK: $q0 = COPY %dup
+    ; CHECK: RET_ReallyLR implicit $q0
+    %copy:fpr(s16) = COPY $h0
+    %dup:fpr(<8 x s16>) = G_DUP %copy(s16)
+    $q0 = COPY %dup(<8 x s16>)
+    RET_ReallyLR implicit $q0
+
+...
+---
+name:            DUPv8i16gpr
+alignment:       4
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $w0
+    ; CHECK-LABEL: name: DUPv8i16gpr
+    ; CHECK: liveins: $w0
+    ; CHECK: %copy:gpr32 = COPY $w0
+    ; CHECK: %dup:fpr128 = DUPv8i16gpr %copy
+    ; CHECK: $q0 = COPY %dup
+    ; CHECK: RET_ReallyLR implicit $q0
+    %copy:gpr(s32) = COPY $w0
+    %dup:fpr(<8 x s16>) = G_DUP %copy(s32)
+    $q0 = COPY %dup(<8 x s16>)
+    RET_ReallyLR implicit $q0
+
+...
+---
+name:            DUPv8i8gpr
+alignment:       4
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $w0
+    ; CHECK-LABEL: name: DUPv8i8gpr
+    ; CHECK: liveins: $w0
+    ; CHECK: %copy:gpr32 = COPY $w0
+    ; CHECK: %dup:fpr64 = DUPv8i8gpr %copy
+    ; CHECK: $d0 = COPY %dup
+    ; CHECK: RET_ReallyLR implicit $d0
+    %copy:gpr(s32) = COPY $w0
+    %dup:fpr(<8 x s8>) = G_DUP %copy(s32)
+    $d0 = COPY %dup(<8 x s8>)
+    RET_ReallyLR implicit $d0
+
+...
+---
+name:            DUPv16i8gpr
+alignment:       4
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $w0
+    ; CHECK-LABEL: name: DUPv16i8gpr
+    ; CHECK: liveins: $w0
+    ; CHECK: %copy:gpr32 = COPY $w0
+    ; CHECK: %dup:fpr128 = DUPv16i8gpr %copy
+    ; CHECK: $q0 = COPY %dup
     ; CHECK: RET_ReallyLR implicit $q0
-    %0:fpr(s64) = COPY $d0
-    %6:fpr(<2 x s64>) = G_DUP %0(s64)
-    $q0 = COPY %6(<2 x s64>)
+    %copy:gpr(s32) = COPY $w0
+    %dup:fpr(<16 x s8>) = G_DUP %copy(s32)
+    $q0 = COPY %dup(<16 x s8>)
     RET_ReallyLR implicit $q0


        


More information about the llvm-commits mailing list