[llvm] [AArch64][GlobalISel] Combine G_CONCAT_VECTORS with Illegal Form (PR #85047)

Wed Mar 13 02:01:41 PDT 2024

llvmbot wrote:




@llvm/pr-subscribers-llvm-globalisel

Author: None (chuongg3)

<details>
<summary>Changes</summary>



---
Full diff: https://github.com/llvm/llvm-project/pull/85047.diff


5 Files Affected:

- (modified) llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h (+108-1) 
- (modified) llvm/include/llvm/CodeGen/GlobalISel/Utils.h (+6) 
- (modified) llvm/lib/CodeGen/GlobalISel/Utils.cpp (+38) 
- (modified) llvm/test/CodeGen/AArch64/GlobalISel/legalize-concat-vectors.mir (+72-15) 
- (modified) llvm/test/CodeGen/AArch64/concat-vector.ll (+35-1) 


``````````diff

diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h
index da330b517c2801..a4f8c1c5ceb828 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h
@@ -359,6 +359,108 @@ class LegalizationArtifactCombiner {
     return false;
   }
 
+  // Try to combine illegal G_CONCAT_VECTORS instructions
+  // Combine if the G_CONCAT_VECTORS instruction is illegal and
+  // Source Registers are:
+  //  - Previously defined by a G_BITCAST instruction
+  //  - Defined by G_IMPLICIT_DEF with look through
+  // ===============
+  //
+  // %0(s32) = G_LOAD %ptr
+  // %1(<4 x s8>) = G_BITCAST %0(s32)
+  // %2(s8) = G_IMPLICIT_DEF
+  // %3(<4 x s8>) = G_BUILD_VECTOR %2(s8), %2(s8), %2(s8), %2(s8)
+  // %4(<16 x s8>) = G_CONCAT_VECTORS %1(<4 x s8>), %3(<4 x s8>), %3(<4 x s8>),
+  // %3(<4 x s8>)
+  //
+  // ====>
+  //
+  // %0(s32) = G_LOAD %ptr
+  // %1(s32) = G_IMPLICIT_DEF
+  // %2(<4 x s32>) = G_BUILD_VECTOR %0(s32), %1(s32), %1(s32), %1(s32)
+  // %3(<16 x s8>) = G_BITCAST %2(<4 x s32>)
+  bool tryCombineConcat(GConcatVectors &MI,
+                        SmallVectorImpl<MachineInstr *> &DeadInsts,
+                        SmallVectorImpl<Register> &UpdatedDefs,
+                        GISelObserverWrapper &WrapperObserver) {
+    Register SrcReg0 = MI.getSourceReg(0);
+    LLT SrcTy = MRI.getType(SrcReg0);
+    unsigned NumSrc = MI.getNumSources();
+
+    Register DstReg = MI.getReg(0);
+    LLT DstTy = MRI.getType(DstReg);
+
+    // Do not attempt combine if it is already legal
+    if (isInstLegal({TargetOpcode::G_CONCAT_VECTORS, {DstTy, SrcTy}}))
+      return false;
+
+    // CHECK if this instruction should be combined before building instructions
+    SmallVector<Register> SrcRegs;
+    unsigned NumUndef = 0;
+    for (unsigned i = 0; i < NumSrc; i++) {
+      Register CurrReg = MI.getSourceReg(i);
+      LLT CurrTy = MRI.getType(CurrReg);
+
+      // Only handle cases where all source types are the same type
+      if (CurrTy.getSizeInBits() != SrcTy.getSizeInBits())
+        return false;
+
+      // Register defined with G_IMPLIT_DEF
+      if (isImpDefVRegValWithLookThrough(CurrReg, MRI)) {
+        NumUndef++;
+        SrcRegs.push_back(CurrReg);
+      }
+
+      else if (MachineInstr *DefMI = getDefIgnoringCopies(CurrReg, MRI);
+               DefMI->getOpcode() == TargetOpcode::G_BITCAST) {
+        Register DefSrcReg = DefMI->getOperand(1).getReg();
+        LLT DefSrcTy = MRI.getType(DefSrcReg);
+
+        // Only handle cases where G_BITCAST source type is scalar
+        if (DefSrcTy.isScalar())
+          SrcRegs.push_back(DefSrcReg);
+        else
+          return false;
+      } else {
+        return false;
+      }
+    }
+
+    // Ignore if all sources are G_IMPLICIT_DEF
+    if (NumUndef == NumSrc)
+      return false;
+
+    // Combine the instruction, matches the condition
+    Builder.setInstrAndDebugLoc(MI);
+    SmallVector<Register> BuildRegs;
+    MachineInstr *UndefMI = nullptr;
+
+    for (Register CurrReg : SrcRegs) {
+      LLT CurrTy = MRI.getType(CurrReg);
+      if (CurrTy == LLT::scalar(SrcTy.getSizeInBits())) {
+        BuildRegs.push_back(CurrReg);
+      } else if (isImpDefVRegValWithLookThrough(CurrReg, MRI)) {
+        UndefMI = !UndefMI
+                      ? Builder.buildUndef(LLT::scalar(SrcTy.getSizeInBits()))
+                      : UndefMI;
+        BuildRegs.push_back(UndefMI->getOperand(0).getReg());
+      } else {
+        return false;
+      }
+    }
+
+    // Build the vector of scalars
+    LLT buildTy = LLT::fixed_vector(MI.getNumSources(), SrcTy.getSizeInBits());
+    Register BuildDstReg =
+        Builder.buildBuildVector(buildTy, BuildRegs).getReg(0);
+
+    // Bitcast them back to the intended type
+    Builder.buildBitcast(DstReg, BuildDstReg);
+
+    DeadInsts.push_back(&MI);
+    return true;
+  }
+
   /// Try to fold G_[ASZ]EXT (G_IMPLICIT_DEF).
   bool tryFoldImplicitDef(MachineInstr &MI,
                           SmallVectorImpl<MachineInstr *> &DeadInsts,
@@ -1317,9 +1419,14 @@ class LegalizationArtifactCombiner {
       Changed = tryCombineUnmergeValues(cast<GUnmerge>(MI), DeadInsts,
                                         UpdatedDefs, WrapperObserver);
       break;
+    case TargetOpcode::G_CONCAT_VECTORS:
+      Changed = tryCombineConcat(cast<GConcatVectors>(MI), DeadInsts,
+                                 UpdatedDefs, WrapperObserver);
+      if (Changed)
+        break;
+      [[fallthrough]];
     case TargetOpcode::G_MERGE_VALUES:
     case TargetOpcode::G_BUILD_VECTOR:
-    case TargetOpcode::G_CONCAT_VECTORS:
       // If any of the users of this merge are an unmerge, then add them to the
       // artifact worklist in case there's folding that can be done looking up.
       for (MachineInstr &U : MRI.use_instructions(MI.getOperand(0).getReg())) {
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h
index f8900f3434ccaa..584b7983f3e08b 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h
@@ -528,6 +528,12 @@ bool isConstTrueVal(const TargetLowering &TLI, int64_t Val, bool IsVector,
 bool isConstFalseVal(const TargetLowering &TLI, int64_t Val, bool IsVector,
                     bool IsFP);
 
+/// Returns true if register has been defined with G_IMPLICIT_DEF
+/// If source register came from Merge-Like instruction, returns true only if
+/// all sources have been defined by G_IMPLICIT_DEF
+bool isImpDefVRegValWithLookThrough(Register VReg,
+                                    const MachineRegisterInfo &MRI);
+
 /// Returns an integer representing true, as defined by the
 /// TargetBooleanContents.
 int64_t getICmpTrueVal(const TargetLowering &TLI, bool IsVector, bool IsFP);
diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
index 23ad68b331c977..b853810a738402 100644
--- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
@@ -411,6 +411,44 @@ std::optional<APInt> getCImmOrFPImmAsAPInt(const MachineInstr *MI) {
 
 } // end anonymous namespace
 
+// Checks if a register has been defined by G_IMPLICIT_DEF previously
+bool llvm::isImpDefVRegValWithLookThrough(Register VReg,
+                                          const MachineRegisterInfo &MRI) {
+
+  MachineInstr *MI;
+  while ((MI = MRI.getVRegDef(VReg))) {
+    switch (MI->getOpcode()) {
+    case TargetOpcode::G_ANYEXT:
+    case TargetOpcode::G_TRUNC:
+    case TargetOpcode::G_BITCAST:
+    case TargetOpcode::G_INTTOPTR:
+      VReg = MI->getOperand(1).getReg();
+      break;
+    case TargetOpcode::COPY:
+      VReg = MI->getOperand(1).getReg();
+      if (VReg.isPhysical())
+        return false;
+      break;
+    case TargetOpcode::G_CONCAT_VECTORS:
+    case TargetOpcode::G_MERGE_VALUES:
+    case TargetOpcode::G_BUILD_VECTOR: {
+      // Check that all sources are G_IMPLICIT_DEF
+      auto MergeMI = cast<GMergeLikeInstr>(MI);
+      for (unsigned i = 0; i < MergeMI->getNumSources(); i++) {
+        if (!isImpDefVRegValWithLookThrough(MergeMI->getSourceReg(i), MRI))
+          return false;
+      }
+      return true;
+    }
+    case TargetOpcode::G_IMPLICIT_DEF:
+      return true;
+    default:
+      return false;
+    }
+  }
+  return false;
+}
+
 std::optional<ValueAndVReg> llvm::getIConstantVRegValWithLookThrough(
     Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs) {
   return getConstantVRegValWithLookThrough(VReg, MRI, isIConstant,
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-concat-vectors.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-concat-vectors.mir
index 3263fdcdee6628..10f096ce706c8e 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-concat-vectors.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-concat-vectors.mir
@@ -9,11 +9,12 @@ body: |
     liveins: $d0, $d1
     ; CHECK-LABEL: name: legal_v4s32_v2s32
     ; CHECK: liveins: $d0, $d1
-    ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0
-    ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $d1
-    ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[COPY]](<2 x s32>), [[COPY1]](<2 x s32>)
-    ; CHECK: $q0 = COPY [[CONCAT_VECTORS]](<4 x s32>)
-    ; CHECK: RET_ReallyLR
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $d1
+    ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[COPY]](<2 x s32>), [[COPY1]](<2 x s32>)
+    ; CHECK-NEXT: $q0 = COPY [[CONCAT_VECTORS]](<4 x s32>)
+    ; CHECK-NEXT: RET_ReallyLR
     %0:_(<2 x s32>) = COPY $d0
     %1:_(<2 x s32>) = COPY $d1
     %2:_(<4 x s32>) = G_CONCAT_VECTORS %0(<2 x s32>), %1(<2 x s32>)
@@ -28,11 +29,12 @@ body: |
     liveins: $d0, $d1
     ; CHECK-LABEL: name: legal_v8s16_v4s16
     ; CHECK: liveins: $d0, $d1
-    ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $d0
-    ; CHECK: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $d1
-    ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[COPY]](<4 x s16>), [[COPY1]](<4 x s16>)
-    ; CHECK: $q0 = COPY [[CONCAT_VECTORS]](<8 x s16>)
-    ; CHECK: RET_ReallyLR
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $d0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $d1
+    ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[COPY]](<4 x s16>), [[COPY1]](<4 x s16>)
+    ; CHECK-NEXT: $q0 = COPY [[CONCAT_VECTORS]](<8 x s16>)
+    ; CHECK-NEXT: RET_ReallyLR
     %0:_(<4 x s16>) = COPY $d0
     %1:_(<4 x s16>) = COPY $d1
     %2:_(<8 x s16>) = G_CONCAT_VECTORS %0(<4 x s16>), %1(<4 x s16>)
@@ -47,14 +49,69 @@ body: |
     liveins: $q0
     ; CHECK-LABEL: name: legal_v16s8_v8s8
     ; CHECK: liveins: $q0
-    ; CHECK: %a:_(<8 x s8>) = G_IMPLICIT_DEF
-    ; CHECK: %b:_(<8 x s8>) = G_IMPLICIT_DEF
-    ; CHECK: %concat:_(<16 x s8>) = G_CONCAT_VECTORS %a(<8 x s8>), %b(<8 x s8>)
-    ; CHECK: $q0 = COPY %concat(<16 x s8>)
-    ; CHECK: RET_ReallyLR implicit $q0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: %a:_(<8 x s8>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: %b:_(<8 x s8>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: %concat:_(<16 x s8>) = G_CONCAT_VECTORS %a(<8 x s8>), %b(<8 x s8>)
+    ; CHECK-NEXT: $q0 = COPY %concat(<16 x s8>)
+    ; CHECK-NEXT: RET_ReallyLR implicit $q0
     %a:_(<8 x s8>) = G_IMPLICIT_DEF
     %b:_(<8 x s8>) = G_IMPLICIT_DEF
     %concat:_(<16 x s8>) = G_CONCAT_VECTORS %a:_(<8 x s8>), %b:_(<8 x s8>)
     $q0 = COPY %concat(<16 x s8>)
     RET_ReallyLR implicit $q0
 ...
+---
+name:            illegal_v16s8_v4s8
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $x0
+
+    ; CHECK-LABEL: name: illegal_v16s8_v4s8
+    ; CHECK: liveins: $x0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: %a:_(p0) = COPY $x0
+    ; CHECK-NEXT: %b:_(s32) = G_LOAD %a(p0) :: (load (s32))
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR %b(s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32)
+    ; CHECK-NEXT: %f:_(<16 x s8>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>)
+    ; CHECK-NEXT: $q0 = COPY %f(<16 x s8>)
+    ; CHECK-NEXT: RET_ReallyLR implicit $q0
+    %a:_(p0) = COPY $x0
+    %b:_(s32) = G_LOAD %a:_(p0) :: (load (s32))
+    %c:_(<4 x s8>) = G_BITCAST %b:_(s32)
+    %d:_(s8) = G_IMPLICIT_DEF
+    %e:_(<4 x s8>) = G_BUILD_VECTOR %d:_(s8), %d:_(s8), %d:_(s8), %d:_(s8)
+    %f:_(<16 x s8>) = G_CONCAT_VECTORS %c:_(<4 x s8>), %e:_(<4 x s8>), %e:_(<4 x s8>), %e:_(<4 x s8>)
+
+    $q0 = COPY %f(<16 x s8>)
+    RET_ReallyLR implicit $q0
+...
+---
+name:            illegal_v8s16_v2s16
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $x0
+
+    ; CHECK-LABEL: name: illegal_v8s16_v2s16
+    ; CHECK: liveins: $x0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: %a:_(p0) = COPY $x0
+    ; CHECK-NEXT: %b:_(s32) = G_LOAD %a(p0) :: (load (s32))
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR %b(s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32)
+    ; CHECK-NEXT: %f:_(<8 x s16>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>)
+    ; CHECK-NEXT: $q0 = COPY %f(<8 x s16>)
+    ; CHECK-NEXT: RET_ReallyLR implicit $q0
+    %a:_(p0) = COPY $x0
+    %b:_(s32) = G_LOAD %a:_(p0) :: (load (s32))
+    %c:_(<2 x s16>) = G_BITCAST %b:_(s32)
+    %d:_(s16) = G_IMPLICIT_DEF
+    %e:_(<2 x s16>) = G_BUILD_VECTOR %d:_(s16), %d:_(s16)
+    %f:_(<8 x s16>) = G_CONCAT_VECTORS %c:_(<2 x s16>), %e:_(<2 x s16>), %e:_(<2 x s16>), %e:_(<2 x s16>)
+
+    $q0 = COPY %f(<8 x s16>)
+    RET_ReallyLR implicit $q0
+...
diff --git a/llvm/test/CodeGen/AArch64/concat-vector.ll b/llvm/test/CodeGen/AArch64/concat-vector.ll
index bd48c32566fc94..34307e7d5d1a22 100644
--- a/llvm/test/CodeGen/AArch64/concat-vector.ll
+++ b/llvm/test/CodeGen/AArch64/concat-vector.ll
@@ -1,5 +1,12 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=aarch64--linux-gnu | FileCheck %s
+; RUN: llc -mtriple=aarch64 %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD
+; RUN: llc -mtriple=aarch64 -global-isel -global-isel-abort=2 %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
+
+; CHECK-GI:         warning: Instruction selection used fallback path for concat1
+; CHECK-GI-NEXT:    warning: Instruction selection used fallback path for concat2
+; CHECK-GI-NEXT:    warning: Instruction selection used fallback path for concat4
+; CHECK-GI-NEXT:    warning: Instruction selection used fallback path for concat9
+; CHECK-GI-NEXT:    warning: Instruction selection used fallback path for concat_v8s16_v2s16
 
 define <4 x i8> @concat1(<2 x i8> %A, <2 x i8> %B) {
 ; CHECK-LABEL: concat1:
@@ -112,3 +119,30 @@ define <16 x half> @concat11(<8 x half> %A, <8 x half> %B) {
    %v16half= shufflevector <8 x half> %A, <8 x half> %B, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
    ret <16 x half> %v16half
 }
+
+define <8 x i16> @concat_v8s16_v2s16(ptr %ptr) local_unnamed_addr #0 {
+; CHECK-LABEL: concat_v8s16_v2s16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldrh w8, [x0]
+; CHECK-NEXT:    add x9, x0, #2
+; CHECK-NEXT:    fmov s0, w8
+; CHECK-NEXT:    ld1 { v0.h }[2], [x9]
+; CHECK-NEXT:    xtn v0.4h, v0.4s
+; CHECK-NEXT:    ret
+    %a = load <2 x i16>, ptr %ptr
+    %b = shufflevector <2 x i16> %a, <2 x i16> %a, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+    ret <8 x i16> %b
+}
+
+define <16 x i8> @concat_v16s8_v4s8(ptr %ptr) local_unnamed_addr #0 {
+; CHECK-LABEL: concat_v16s8_v4s8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldr s0, [x0]
+; CHECK-NEXT:    ret
+    %a = load <4 x i8>, ptr %ptr
+    %b = shufflevector <4 x i8> %a, <4 x i8> %a, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+    ret <16 x i8> %b
+}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CHECK-GI: {{.*}}
+; CHECK-SD: {{.*}}

``````````

</details>


https://github.com/llvm/llvm-project/pull/85047