[llvm] r353121 - GlobalISel: Combine g_extract with g_merge_values

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Mon Feb 4 15:41:59 PST 2019


Author: arsenm
Date: Mon Feb  4 15:41:59 2019
New Revision: 353121

URL: http://llvm.org/viewvc/llvm-project?rev=353121&view=rev
Log:
GlobalISel: Combine g_extract with g_merge_values

Try to use the underlying source registers.

This enables legalization in more cases where some irregular
operations are widened and others narrowed.

This seems to make the test_combines_2 AArch64 test worse, since the
MERGE_VALUES has multiple uses. Since this should be required for
legalization, a hasOneUse check is probably inappropriate (or maybe
should only be used if the merge is legal?).

Added:
    llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-extract.mir
Modified:
    llvm/trunk/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h
    llvm/trunk/lib/CodeGen/GlobalISel/Legalizer.cpp
    llvm/trunk/test/CodeGen/AArch64/GlobalISel/legalize-combines.mir
    llvm/trunk/test/CodeGen/AArch64/GlobalISel/legalize-extracts.mir

Modified: llvm/trunk/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h?rev=353121&r1=353120&r2=353121&view=diff
==============================================================================
--- llvm/trunk/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h (original)
+++ llvm/trunk/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h Mon Feb  4 15:41:59 2019
@@ -163,6 +163,16 @@ public:
     return false;
   }
 
+  static unsigned getMergeOpcode(LLT OpTy, LLT DestTy) {
+    if (OpTy.isVector() && DestTy.isVector())
+      return TargetOpcode::G_CONCAT_VECTORS;
+
+    if (OpTy.isVector() && !DestTy.isVector())
+      return TargetOpcode::G_BUILD_VECTOR;
+
+    return TargetOpcode::G_MERGE_VALUES;
+  }
+
   bool tryCombineMerges(MachineInstr &MI,
                         SmallVectorImpl<MachineInstr *> &DeadInsts) {
 
@@ -171,16 +181,10 @@ public:
 
     unsigned NumDefs = MI.getNumOperands() - 1;
 
-    unsigned MergingOpcode;
     LLT OpTy = MRI.getType(MI.getOperand(NumDefs).getReg());
     LLT DestTy = MRI.getType(MI.getOperand(0).getReg());
-    if (OpTy.isVector() && DestTy.isVector())
-      MergingOpcode = TargetOpcode::G_CONCAT_VECTORS;
-    else if (OpTy.isVector() && !DestTy.isVector())
-      MergingOpcode = TargetOpcode::G_BUILD_VECTOR;
-    else
-      MergingOpcode = TargetOpcode::G_MERGE_VALUES;
 
+    unsigned MergingOpcode = getMergeOpcode(OpTy, DestTy);
     MachineInstr *MergeI =
         getOpcodeDef(MergingOpcode, MI.getOperand(NumDefs).getReg(), MRI);
 
@@ -249,6 +253,65 @@ public:
     return true;
   }
 
+  static bool isMergeLikeOpcode(unsigned Opc) {
+    switch (Opc) {
+    case TargetOpcode::G_MERGE_VALUES:
+    case TargetOpcode::G_BUILD_VECTOR:
+    case TargetOpcode::G_CONCAT_VECTORS:
+      return true;
+    default:
+      return false;
+    }
+  }
+
+  bool tryCombineExtract(MachineInstr &MI,
+                         SmallVectorImpl<MachineInstr *> &DeadInsts) {
+    assert(MI.getOpcode() == TargetOpcode::G_EXTRACT);
+
+    // Try to use the source registers from a G_MERGE_VALUES
+    //
+    // %2 = G_MERGE_VALUES %0, %1
+    // %3 = G_EXTRACT %2, N
+    // =>
+    //
+    // for N < %2.getSizeInBits() / 2
+    //     %3 = G_EXTRACT %0, N
+    //
+    // for N >= %2.getSizeInBits() / 2
+    //    %3 = G_EXTRACT %1, (N - %0.getSizeInBits()
+
+    unsigned Src = lookThroughCopyInstrs(MI.getOperand(1).getReg());
+    MachineInstr *MergeI = MRI.getVRegDef(Src);
+    if (!MergeI || !isMergeLikeOpcode(MergeI->getOpcode()))
+      return false;
+
+    LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
+    LLT SrcTy = MRI.getType(Src);
+
+    // TODO: Do we need to check if the resulting extract is supported?
+    unsigned ExtractDstSize = DstTy.getSizeInBits();
+    unsigned Offset = MI.getOperand(2).getImm();
+    unsigned NumMergeSrcs = MergeI->getNumOperands() - 1;
+    unsigned MergeSrcSize = SrcTy.getSizeInBits() / NumMergeSrcs;
+    unsigned MergeSrcIdx = Offset / MergeSrcSize;
+
+    // Compute the offset of the last bit the extract needs.
+    unsigned EndMergeSrcIdx = (Offset + ExtractDstSize - 1) / MergeSrcSize;
+
+    // Can't handle the case where the extract spans multiple inputs.
+    if (MergeSrcIdx != EndMergeSrcIdx)
+      return false;
+
+    // TODO: We could modify MI in place in most cases.
+    Builder.setInstr(MI);
+    Builder.buildExtract(
+      MI.getOperand(0).getReg(),
+      MergeI->getOperand(MergeSrcIdx + 1).getReg(),
+      Offset - MergeSrcIdx * MergeSrcSize);
+    markInstAndDefDead(MI, *MergeI, DeadInsts);
+    return true;
+  }
+
   /// Try to combine away MI.
   /// Returns true if it combined away the MI.
   /// Adds instructions that are dead as a result of the combine
@@ -266,6 +329,8 @@ public:
       return tryCombineSExt(MI, DeadInsts);
     case TargetOpcode::G_UNMERGE_VALUES:
       return tryCombineMerges(MI, DeadInsts);
+    case TargetOpcode::G_EXTRACT:
+      return tryCombineExtract(MI, DeadInsts);
     case TargetOpcode::G_TRUNC: {
       bool Changed = false;
       for (auto &Use : MRI.use_instructions(MI.getOperand(0).getReg()))
@@ -276,6 +341,23 @@ public:
   }
 
 private:
+
+  static unsigned getArtifactSrcReg(const MachineInstr &MI) {
+    switch (MI.getOpcode()) {
+    case TargetOpcode::COPY:
+    case TargetOpcode::G_TRUNC:
+    case TargetOpcode::G_ZEXT:
+    case TargetOpcode::G_ANYEXT:
+    case TargetOpcode::G_SEXT:
+    case TargetOpcode::G_UNMERGE_VALUES:
+      return MI.getOperand(MI.getNumOperands() - 1).getReg();
+    case TargetOpcode::G_EXTRACT:
+      return MI.getOperand(1).getReg();
+    default:
+      llvm_unreachable("Not a legalization artifact happen");
+    }
+  }
+
   /// Mark MI as dead. If a def of one of MI's operands, DefMI, would also be
   /// dead due to MI being killed, then mark DefMI as dead too.
   /// Some of the combines (extends(trunc)), try to walk through redundant
@@ -296,8 +378,8 @@ private:
     // and as a result, %3, %2, %1 are dead.
     MachineInstr *PrevMI = &MI;
     while (PrevMI != &DefMI) {
-      unsigned PrevRegSrc =
-          PrevMI->getOperand(PrevMI->getNumOperands() - 1).getReg();
+      unsigned PrevRegSrc = getArtifactSrcReg(*PrevMI);
+
       MachineInstr *TmpDef = MRI.getVRegDef(PrevRegSrc);
       if (MRI.hasOneUse(PrevRegSrc)) {
         if (TmpDef != &DefMI) {

Modified: llvm/trunk/lib/CodeGen/GlobalISel/Legalizer.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/GlobalISel/Legalizer.cpp?rev=353121&r1=353120&r2=353121&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/GlobalISel/Legalizer.cpp (original)
+++ llvm/trunk/lib/CodeGen/GlobalISel/Legalizer.cpp Mon Feb  4 15:41:59 2019
@@ -76,6 +76,7 @@ static bool isArtifact(const MachineInst
   case TargetOpcode::G_UNMERGE_VALUES:
   case TargetOpcode::G_CONCAT_VECTORS:
   case TargetOpcode::G_BUILD_VECTOR:
+  case TargetOpcode::G_EXTRACT:
     return true;
   }
 }

Modified: llvm/trunk/test/CodeGen/AArch64/GlobalISel/legalize-combines.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/GlobalISel/legalize-combines.mir?rev=353121&r1=353120&r2=353121&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/GlobalISel/legalize-combines.mir (original)
+++ llvm/trunk/test/CodeGen/AArch64/GlobalISel/legalize-combines.mir Mon Feb  4 15:41:59 2019
@@ -22,8 +22,11 @@ body: |
     ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
     ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY]]
     ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[ADD]](s32)
-    ; CHECK: [[EXTRACT:%[0-9]+]]:_(s1) = G_EXTRACT [[MV]](s64), 0
-    ; CHECK: [[EXTRACT1:%[0-9]+]]:_(s64) = COPY [[MV]](s64)
+    ; CHECK: [[EXTRACT:%[0-9]+]]:_(s1) = G_EXTRACT [[COPY]](s32), 0
+    ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT]](s1)
+    ; CHECK: $w0 = COPY [[ANYEXT]](s32)
+    ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY [[MV]](s64)
+    ; CHECK: $x0 = COPY [[COPY1]](s64)
     %0:_(s32) = COPY $w0
 
     %1:_(s32) = G_ADD %0, %0
@@ -45,6 +48,7 @@ body: |
     ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
     ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY]]
     ; CHECK: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[ADD]]
+    ; CHECK: $w0 = COPY [[ADD1]](s32)
     %0:_(s32) = COPY $w0
 
     %1:_(s32) = G_ADD %0, %0
@@ -64,6 +68,7 @@ body: |
     ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
     ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY [[COPY]](s64)
     ; CHECK: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[COPY1]], [[COPY1]]
+    ; CHECK: $x0 = COPY [[ADD]](s64)
     %0:_(s64) = COPY $x0
 
     %1:_(s128) = G_MERGE_VALUES %0, %0
@@ -82,6 +87,7 @@ body: |
     ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
     ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY]]
     ; CHECK: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[ADD]]
+    ; CHECK: $w0 = COPY [[ADD1]](s32)
     %0:_(s32) = COPY $w0
 
     %1:_(s32) = G_ADD %0, %0

Modified: llvm/trunk/test/CodeGen/AArch64/GlobalISel/legalize-extracts.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/GlobalISel/legalize-extracts.mir?rev=353121&r1=353120&r2=353121&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/GlobalISel/legalize-extracts.mir (original)
+++ llvm/trunk/test/CodeGen/AArch64/GlobalISel/legalize-extracts.mir Mon Feb  4 15:41:59 2019
@@ -11,13 +11,15 @@ body: |
     ; value stored is forwarded directly from first load.
 
     ; CHECK-LABEL: name: test_extracts_1
-    ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x2
-    ; CHECK: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load 8, align 16)
+    ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
+    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
+    ; CHECK: [[COPY2:%[0-9]+]]:_(p0) = COPY $x2
+    ; CHECK: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY2]](p0) :: (load 8, align 16)
     ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; CHECK: [[GEP:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C]](s64)
+    ; CHECK: [[GEP:%[0-9]+]]:_(p0) = G_GEP [[COPY2]], [[C]](s64)
     ; CHECK: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[GEP]](p0) :: (load 8)
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY [[LOAD]](s64)
-    ; CHECK: G_STORE [[COPY1]](s64), [[COPY]](p0) :: (store 8)
+    ; CHECK: [[COPY3:%[0-9]+]]:_(s64) = COPY [[LOAD]](s64)
+    ; CHECK: G_STORE [[COPY3]](s64), [[COPY2]](p0) :: (store 8)
     ; CHECK: RET_ReallyLR
     %0:_(s64) = COPY $x0
     %1:_(s32) = COPY $w1
@@ -36,16 +38,17 @@ body: |
 
       ; Low extraction wipes takes whole low register. High extraction is real.
     ; CHECK-LABEL: name: test_extracts_2
-    ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x2
-    ; CHECK: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load 8, align 16)
+    ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
+    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
+    ; CHECK: [[COPY2:%[0-9]+]]:_(p0) = COPY $x2
+    ; CHECK: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY2]](p0) :: (load 8, align 16)
     ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; CHECK: [[GEP:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C]](s64)
+    ; CHECK: [[GEP:%[0-9]+]]:_(p0) = G_GEP [[COPY2]], [[C]](s64)
     ; CHECK: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[GEP]](p0) :: (load 8)
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY [[LOAD]](s64)
+    ; CHECK: [[COPY3:%[0-9]+]]:_(s64) = COPY [[LOAD]](s64)
     ; CHECK: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[LOAD1]](s64), 0
-    ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[EXTRACT]](s32)
-    ; CHECK: G_STORE [[COPY1]](s64), [[COPY]](p0) :: (store 8)
-    ; CHECK: G_STORE [[COPY2]](s32), [[COPY]](p0) :: (store 4)
+    ; CHECK: G_STORE [[COPY3]](s64), [[COPY2]](p0) :: (store 8)
+    ; CHECK: G_STORE [[EXTRACT]](s32), [[COPY2]](p0) :: (store 4)
     ; CHECK: RET_ReallyLR
     %0:_(s64) = COPY $x0
     %1:_(s32) = COPY $w1
@@ -90,9 +93,9 @@ body: |
 
     ; CHECK-LABEL: name: test_extracts_4
     ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
+    ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1
     ; CHECK: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](s64), 32
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[EXTRACT]](s32)
-    ; CHECK: $w0 = COPY [[COPY1]](s32)
+    ; CHECK: $w0 = COPY [[EXTRACT]](s32)
     ; CHECK: RET_ReallyLR
     %0:_(s64) = COPY $x0
     %1:_(s64) = COPY $x1

Added: llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-extract.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-extract.mir?rev=353121&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-extract.mir (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-extract.mir Mon Feb  4 15:41:59 2019
@@ -0,0 +1,470 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -O0 -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck %s
+
+---
+name: extract_s32_merge_s64_s32_s32_offset0
+
+body: |
+  bb.0:
+    ; CHECK-LABEL: name: extract_s32_merge_s64_s32_s32_offset0
+    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CHECK: $vgpr0 = COPY [[COPY]](s32)
+    %0:_(s32) = G_CONSTANT i32 0
+    %1:_(s32) = G_CONSTANT i32 1
+    %2:_(s64) = G_MERGE_VALUES %0, %1
+    %3:_(s32) = G_EXTRACT %2, 0
+    $vgpr0 = COPY %3
+...
+
+---
+name: extract_s32_merge_s64_s32_s32_offset32
+
+body: |
+  bb.0:
+    ; CHECK-LABEL: name: extract_s32_merge_s64_s32_s32_offset32
+    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
+    ; CHECK: $vgpr0 = COPY [[COPY]](s32)
+    %0:_(s32) = G_CONSTANT i32 0
+    %1:_(s32) = G_CONSTANT i32 1
+    %2:_(s64) = G_MERGE_VALUES %0, %1
+    %3:_(s32) = G_EXTRACT %2, 32
+    $vgpr0 = COPY %3
+...
+
+---
+name: extract_s64_merge_s128_s64_s64_offset0
+
+body: |
+  bb.0:
+    ; CHECK-LABEL: name: extract_s64_merge_s128_s64_s64_offset0
+    ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY [[C]](s64)
+    ; CHECK: $vgpr0_vgpr1 = COPY [[COPY]](s64)
+    %0:_(s64) = G_CONSTANT i64 0
+    %1:_(s64) = G_CONSTANT i64 1
+    %2:_(s128) = G_MERGE_VALUES %0, %1
+    %3:_(s64) = G_EXTRACT %2, 0
+    $vgpr0_vgpr1 = COPY %3
+...
+
+---
+name: extract_s64_merge_s128_s64_s64_offset64
+
+body: |
+  bb.0:
+    ; CHECK-LABEL: name: extract_s64_merge_s128_s64_s64_offset64
+    ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY [[C1]](s64)
+    ; CHECK: $vgpr0_vgpr1 = COPY [[COPY]](s64)
+    %0:_(s64) = G_CONSTANT i64 0
+    %1:_(s64) = G_CONSTANT i64 1
+    %2:_(s128) = G_MERGE_VALUES %0, %1
+    %3:_(s64) = G_EXTRACT %2, 64
+    $vgpr0_vgpr1 = COPY %3
+...
+
+---
+name: extract_s32_merge_s128_s64_s64_offset0
+
+body: |
+  bb.0:
+    ; CHECK-LABEL: name: extract_s32_merge_s128_s64_s64_offset0
+    ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; CHECK: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[C]](s64), 0
+    ; CHECK: $vgpr0 = COPY [[EXTRACT]](s32)
+    %0:_(s64) = G_CONSTANT i64 0
+    %1:_(s64) = G_CONSTANT i64 1
+    %2:_(s128) = G_MERGE_VALUES %0, %1
+    %3:_(s32) = G_EXTRACT %2, 0
+    $vgpr0 = COPY %3
+...
+
+---
+name: extract_s32_merge_s128_s64_s64_offset32
+
+body: |
+  bb.0:
+    ; CHECK-LABEL: name: extract_s32_merge_s128_s64_s64_offset32
+    ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; CHECK: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[C]](s64), 32
+    ; CHECK: $vgpr0 = COPY [[EXTRACT]](s32)
+    %0:_(s64) = G_CONSTANT i64 0
+    %1:_(s64) = G_CONSTANT i64 1
+    %2:_(s128) = G_MERGE_VALUES %0, %1
+    %3:_(s32) = G_EXTRACT %2, 32
+    $vgpr0 = COPY %3
+...
+
+---
+name: extract_s32_merge_s128_s64_s64_offset64
+
+body: |
+  bb.0:
+    ; CHECK-LABEL: name: extract_s32_merge_s128_s64_s64_offset64
+    ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; CHECK: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[C1]](s64), 0
+    ; CHECK: $vgpr0 = COPY [[EXTRACT]](s32)
+    %0:_(s64) = G_CONSTANT i64 0
+    %1:_(s64) = G_CONSTANT i64 1
+    %2:_(s128) = G_MERGE_VALUES %0, %1
+    %3:_(s32) = G_EXTRACT %2, 64
+    $vgpr0 = COPY %3
+...
+
+---
+name: extract_s32_merge_s128_s64_s64_offset96
+
+body: |
+  bb.0:
+    ; CHECK-LABEL: name: extract_s32_merge_s128_s64_s64_offset96
+    ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; CHECK: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[C1]](s64), 32
+    ; CHECK: $vgpr0 = COPY [[EXTRACT]](s32)
+    %0:_(s64) = G_CONSTANT i64 0
+    %1:_(s64) = G_CONSTANT i64 1
+    %2:_(s128) = G_MERGE_VALUES %0, %1
+    %3:_(s32) = G_EXTRACT %2, 96
+    $vgpr0 = COPY %3
+...
+
+# Destination size fits, but is skewed from the start of the register.
+---
+name: extract_s16_merge_s128_s64_s64_offset18
+
+body: |
+  bb.0:
+    ; CHECK-LABEL: name: extract_s16_merge_s128_s64_s64_offset18
+    ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; CHECK: [[EXTRACT:%[0-9]+]]:_(s16) = G_EXTRACT [[C]](s64), 18
+    ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT]](s16)
+    ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32)
+    %0:_(s64) = G_CONSTANT i64 0
+    %1:_(s64) = G_CONSTANT i64 1
+    %2:_(s128) = G_MERGE_VALUES %0, %1
+    %3:_(s16) = G_EXTRACT %2, 18
+    %4:_(s32) = G_ANYEXT %3
+    $vgpr0 = COPY %4
+...
+
+# Destination size fits, but is skewed from the start of the register.
+---
+name: extract_s16_merge_s128_s64_s64_offset82
+
+body: |
+  bb.0:
+    ; CHECK-LABEL: name: extract_s16_merge_s128_s64_s64_offset82
+    ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; CHECK: [[EXTRACT:%[0-9]+]]:_(s16) = G_EXTRACT [[C1]](s64), 18
+    ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT]](s16)
+    ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32)
+    %0:_(s64) = G_CONSTANT i64 0
+    %1:_(s64) = G_CONSTANT i64 1
+    %2:_(s128) = G_MERGE_VALUES %0, %1
+    %3:_(s16) = G_EXTRACT %2, 82
+    %4:_(s32) = G_ANYEXT %3
+    $vgpr0 = COPY %4
+...
+
+
+# Can't handle this since it spans two registers
+---
+name: extract_s64_merge_s128_s64_s64_offset32
+
+body: |
+  bb.0:
+    ; CHECK-LABEL: name: extract_s64_merge_s128_s64_s64_offset32
+    ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; CHECK: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[C]](s64), [[C1]](s64)
+    ; CHECK: [[EXTRACT:%[0-9]+]]:_(s64) = G_EXTRACT [[MV]](s128), 32
+    ; CHECK: $vgpr0_vgpr1 = COPY [[EXTRACT]](s64)
+    %0:_(s64) = G_CONSTANT i64 0
+    %1:_(s64) = G_CONSTANT i64 1
+    %2:_(s128) = G_MERGE_VALUES %0, %1
+    %3:_(s64) = G_EXTRACT %2, 32
+    $vgpr0_vgpr1 = COPY %3
+...
+
+
+# Only the last bit spans to another register
+---
+name: extract_s16_merge_s32_s32_offset1
+
+body: |
+  bb.0:
+    ; CHECK-LABEL: name: extract_s16_merge_s32_s32_offset1
+    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C]](s32), [[C1]](s32)
+    ; CHECK: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[MV]](s64), 1
+    ; CHECK: $vgpr0 = COPY [[EXTRACT]](s32)
+    %0:_(s32) = G_CONSTANT i32 0
+    %1:_(s32) = G_CONSTANT i32 1
+    %2:_(s64) = G_MERGE_VALUES %0, %1
+    %3:_(s32) = G_EXTRACT %2, 1
+    $vgpr0 = COPY %3
+...
+
+
+# Test with some merges with 3 operands
+
+---
+name: extract_s32_merge_s96_s32_s32_s32_offset0
+
+body: |
+  bb.0:
+    ; CHECK-LABEL: name: extract_s32_merge_s96_s32_s32_s32_offset0
+    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CHECK: $vgpr0 = COPY [[COPY]](s32)
+    %0:_(s32) = G_CONSTANT i32 0
+    %1:_(s32) = G_CONSTANT i32 1
+    %2:_(s32) = G_CONSTANT i32 1
+    %3:_(s96) = G_MERGE_VALUES %0, %1, %2
+    %4:_(s32) = G_EXTRACT %3, 0
+    $vgpr0 = COPY %4
+...
+
+---
+name: extract_s32_merge_s96_s32_s32_s32_offset64
+
+body: |
+  bb.0:
+    ; CHECK-LABEL: name: extract_s32_merge_s96_s32_s32_s32_offset64
+    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
+    ; CHECK: $vgpr0 = COPY [[COPY]](s32)
+    %0:_(s32) = G_CONSTANT i32 0
+    %1:_(s32) = G_CONSTANT i32 1
+    %2:_(s32) = G_CONSTANT i32 1
+    %3:_(s96) = G_MERGE_VALUES %0, %1, %2
+    %4:_(s32) = G_EXTRACT %3, 64
+    $vgpr0 = COPY %4
+...
+
+---
+name: extract_s64_merge_s96_s32_s32_s32_offset0
+
+body: |
+  bb.0:
+    ; CHECK-LABEL: name: extract_s64_merge_s96_s32_s32_s32_offset0
+    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; CHECK: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[C]](s32), [[C1]](s32), [[C2]](s32)
+    ; CHECK: [[EXTRACT:%[0-9]+]]:_(s64) = G_EXTRACT [[MV]](s96), 0
+    ; CHECK: $vgpr0_vgpr1 = COPY [[EXTRACT]](s64)
+    %0:_(s32) = G_CONSTANT i32 0
+    %1:_(s32) = G_CONSTANT i32 1
+    %2:_(s32) = G_CONSTANT i32 1
+    %3:_(s96) = G_MERGE_VALUES %0, %1, %2
+    %4:_(s64) = G_EXTRACT %3, 0
+    $vgpr0_vgpr1 = COPY %4
+...
+
+---
+name: extract_s64_merge_s96_s32_s32_s32_offset32
+
+body: |
+  bb.0:
+    ; CHECK-LABEL: name: extract_s64_merge_s96_s32_s32_s32_offset32
+    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; CHECK: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[C]](s32), [[C1]](s32), [[C2]](s32)
+    ; CHECK: [[EXTRACT:%[0-9]+]]:_(s64) = G_EXTRACT [[MV]](s96), 32
+    ; CHECK: $vgpr0_vgpr1 = COPY [[EXTRACT]](s64)
+    %0:_(s32) = G_CONSTANT i32 0
+    %1:_(s32) = G_CONSTANT i32 1
+    %2:_(s32) = G_CONSTANT i32 1
+    %3:_(s96) = G_MERGE_VALUES %0, %1, %2
+    %4:_(s64) = G_EXTRACT %3, 32
+    $vgpr0_vgpr1 = COPY %4
+...
+
+# Test build_vector sources
+---
+name: extract_s64_build_vector_v2s64_s64_s64_offset0
+
+body: |
+  bb.0:
+    ; CHECK-LABEL: name: extract_s64_build_vector_v2s64_s64_s64_offset0
+    ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY [[C]](s64)
+    ; CHECK: $vgpr0_vgpr1 = COPY [[COPY]](s64)
+    %0:_(s64) = G_CONSTANT i64 0
+    %1:_(s64) = G_CONSTANT i64 1
+    %2:_(<2 x s64>) = G_BUILD_VECTOR %0, %1
+    %3:_(s64) = G_EXTRACT %2, 0
+    $vgpr0_vgpr1 = COPY %3
+...
+
+---
+name: extract_s64_build_vector_v2s64_s64_s64_offset64
+
+body: |
+  bb.0:
+    ; CHECK-LABEL: name: extract_s64_build_vector_v2s64_s64_s64_offset64
+    ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY [[C1]](s64)
+    ; CHECK: $vgpr0_vgpr1 = COPY [[COPY]](s64)
+    %0:_(s64) = G_CONSTANT i64 0
+    %1:_(s64) = G_CONSTANT i64 1
+    %2:_(<2 x s64>) = G_BUILD_VECTOR %0, %1
+    %3:_(s64) = G_EXTRACT %2, 64
+    $vgpr0_vgpr1 = COPY %3
+...
+
+---
+name: extract_s64_build_vector_v2s64_s64_s64_offset32
+
+body: |
+  bb.0:
+    ; CHECK-LABEL: name: extract_s64_build_vector_v2s64_s64_s64_offset32
+    ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C1]](s64)
+    ; CHECK: [[EXTRACT:%[0-9]+]]:_(s64) = G_EXTRACT [[BUILD_VECTOR]](<2 x s64>), 32
+    ; CHECK: $vgpr0_vgpr1 = COPY [[EXTRACT]](s64)
+    %0:_(s64) = G_CONSTANT i64 0
+    %1:_(s64) = G_CONSTANT i64 1
+    %2:_(<2 x s64>) = G_BUILD_VECTOR %0, %1
+    %3:_(s64) = G_EXTRACT %2, 32
+    $vgpr0_vgpr1 = COPY %3
+...
+
+# Test extracting something smaller than the element size
+---
+name: extract_s32_build_vector_v2s64_s64_s64_offset64
+
+body: |
+  bb.0:
+    ; CHECK-LABEL: name: extract_s32_build_vector_v2s64_s64_s64_offset64
+    ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; CHECK: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[C1]](s64), 0
+    ; CHECK: $vgpr0 = COPY [[EXTRACT]](s32)
+    %0:_(s64) = G_CONSTANT i64 0
+    %1:_(s64) = G_CONSTANT i64 1
+    %2:_(<2 x s64>) = G_BUILD_VECTOR %0, %1
+    %3:_(s32) = G_EXTRACT %2, 64
+    $vgpr0 = COPY %3
+
+...
+
+# Test concat_vector sources
+---
+name: extract_v2s16_build_vector_v2s64_v2s16_v2s16_offset0
+
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+    ; CHECK-LABEL: name: extract_v2s16_build_vector_v2s64_v2s16_v2s16_offset0
+    ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
+    ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
+    ; CHECK: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY [[COPY]](<2 x s16>)
+    ; CHECK: $vgpr0 = COPY [[COPY2]](<2 x s16>)
+    %0:_(<2 x s16>) = COPY $vgpr0
+    %1:_(<2 x s16>) = COPY $vgpr1
+    %2:_(<4 x s16>) = G_CONCAT_VECTORS %0, %1
+    %3:_(<2 x s16>) = G_EXTRACT %2, 0
+    $vgpr0 = COPY %3
+...
+
+---
+name: extract_v2s16_build_vector_v2s64_v2s16_v2s16_offset32
+
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+    ; CHECK-LABEL: name: extract_v2s16_build_vector_v2s64_v2s16_v2s16_offset32
+    ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
+    ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
+    ; CHECK: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY [[COPY1]](<2 x s16>)
+    ; CHECK: $vgpr0 = COPY [[COPY2]](<2 x s16>)
+    %0:_(<2 x s16>) = COPY $vgpr0
+    %1:_(<2 x s16>) = COPY $vgpr1
+    %2:_(<4 x s16>) = G_CONCAT_VECTORS %0, %1
+    %3:_(<2 x s16>) = G_EXTRACT %2, 32
+    $vgpr0 = COPY %3
+...
+
+# Test extracting only a single element, not a subvector
+---
+name: extract_s16_build_vector_v2s64_v2s16_v2s16_offset32
+
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+    ; CHECK-LABEL: name: extract_s16_build_vector_v2s64_v2s16_v2s16_offset32
+    ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
+    ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
+    ; CHECK: [[EXTRACT:%[0-9]+]]:_(s16) = G_EXTRACT [[COPY1]](<2 x s16>), 0
+    ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT]](s16)
+    ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32)
+    %0:_(<2 x s16>) = COPY $vgpr0
+    %1:_(<2 x s16>) = COPY $vgpr1
+    %2:_(<4 x s16>) = G_CONCAT_VECTORS %0, %1
+    %3:_(s16) = G_EXTRACT %2, 32
+    %4:_(s32) = G_ANYEXT %3
+    $vgpr0 = COPY %4
+...
+
+---
+name: extract_s16_build_vector_v2s64_v2s16_v2s16_offset48
+
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+    ; CHECK-LABEL: name: extract_s16_build_vector_v2s64_v2s16_v2s16_offset48
+    ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
+    ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
+    ; CHECK: [[EXTRACT:%[0-9]+]]:_(s16) = G_EXTRACT [[COPY1]](<2 x s16>), 16
+    ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT]](s16)
+    ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32)
+    %0:_(<2 x s16>) = COPY $vgpr0
+    %1:_(<2 x s16>) = COPY $vgpr1
+    %2:_(<4 x s16>) = G_CONCAT_VECTORS %0, %1
+    %3:_(s16) = G_EXTRACT %2, 48
+    %4:_(s32) = G_ANYEXT %3
+    $vgpr0 = COPY %4
+...
+
+# Test extracting less than an element
+---
+name: extract_s8_build_vector_v2s64_v2s16_v2s16_offset48
+
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+    ; CHECK-LABEL: name: extract_s8_build_vector_v2s64_v2s16_v2s16_offset48
+    ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
+    ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
+    ; CHECK: [[EXTRACT:%[0-9]+]]:_(s8) = G_EXTRACT [[COPY1]](<2 x s16>), 16
+    ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT]](s8)
+    ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32)
+    %0:_(<2 x s16>) = COPY $vgpr0
+    %1:_(<2 x s16>) = COPY $vgpr1
+    %2:_(<4 x s16>) = G_CONCAT_VECTORS %0, %1
+    %3:_(s8) = G_EXTRACT %2, 48
+    %4:_(s32) = G_ANYEXT %3
+    $vgpr0 = COPY %4
+...




More information about the llvm-commits mailing list