[llvm] r364212 - AMDGPU/GlobalISel: Split VALU s64 G_ZEXT/G_SEXT in RegBankSelect

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Mon Jun 24 10:54:12 PDT 2019


Author: arsenm
Date: Mon Jun 24 10:54:12 2019
New Revision: 364212

URL: http://llvm.org/viewvc/llvm-project?rev=364212&view=rev
Log:
AMDGPU/GlobalISel: Split VALU s64 G_ZEXT/G_SEXT in RegBankSelect

Scalar extends to s64 can use S_BFE_{I64|U64}, but vector extends need
to extend to the 32-bit half, and then to 64.

I'm not sure what the line should be between what RegBankSelect
handles, and what instruction select does, but for now I'm erring on
the side of RegBankSelect for future post-RBS combines.

Modified:
    llvm/trunk/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
    llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sext.mir
    llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/regbankselect-zext.mir

Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp?rev=364212&r1=364211&r2=364212&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp Mon Jun 24 10:54:12 2019
@@ -830,19 +830,53 @@ void AMDGPURegisterBankInfo::applyMappin
   case AMDGPU::G_ZEXT: {
     Register SrcReg = MI.getOperand(1).getReg();
     LLT SrcTy = MRI.getType(SrcReg);
-    if (SrcTy != LLT::scalar(1))
-      return;
+    bool Signed = Opc == AMDGPU::G_SEXT;
 
     MachineIRBuilder B(MI);
-    bool Signed = Opc == AMDGPU::G_SEXT;
+    const RegisterBank *SrcBank = getRegBank(SrcReg, MRI, *TRI);
+
     Register DstReg = MI.getOperand(0).getReg();
     LLT DstTy = MRI.getType(DstReg);
-    const RegisterBank *SrcBank = getRegBank(SrcReg, MRI, *TRI);
-    if (SrcBank->getID() == AMDGPU::SCCRegBankID ||
-        SrcBank->getID() == AMDGPU::VCCRegBankID) {
-      const RegisterBank *DstBank = getRegBank(DstReg, MRI, *TRI);
-      unsigned DstSize = DstTy.getSizeInBits();
+    if (DstTy.isScalar() &&
+        SrcBank != &AMDGPU::SGPRRegBank &&
+        SrcBank != &AMDGPU::SCCRegBank &&
+        SrcBank != &AMDGPU::VCCRegBank &&
+        // FIXME: Should handle any type that round to s64 when irregular
+        // breakdowns supported.
+        DstTy.getSizeInBits() == 64 &&
+        SrcTy.getSizeInBits() <= 32) {
+      const LLT S32 = LLT::scalar(32);
+      SmallVector<Register, 2> DefRegs(OpdMapper.getVRegs(0));
+
+      // Extend to 32-bit, and then extend the low half.
+      if (Signed) {
+        // TODO: Should really be buildSExtOrCopy
+        B.buildSExtOrTrunc(DefRegs[0], SrcReg);
+
+        // Replicate sign bit from 32-bit extended part.
+        auto ShiftAmt = B.buildConstant(S32, 31);
+        MRI.setRegBank(ShiftAmt.getReg(0), *SrcBank);
+        B.buildAShr(DefRegs[1], DefRegs[0], ShiftAmt);
+      } else {
+        B.buildZExtOrTrunc(DefRegs[0], SrcReg);
+        B.buildConstant(DefRegs[1], 0);
+      }
+
+      MRI.setRegBank(DstReg, *SrcBank);
+      MI.eraseFromParent();
+      return;
+    }
+
+    if (SrcTy != LLT::scalar(1))
+      return;
+
+    if (SrcBank == &AMDGPU::SCCRegBank || SrcBank == &AMDGPU::VCCRegBank) {
+      SmallVector<Register, 2> DefRegs(OpdMapper.getVRegs(0));
 
+      const RegisterBank *DstBank = SrcBank == &AMDGPU::SCCRegBank ?
+        &AMDGPU::SGPRRegBank : &AMDGPU::VGPRRegBank;
+
+      unsigned DstSize = DstTy.getSizeInBits();
       // 64-bit select is SGPR only
       const bool UseSel64 = DstSize > 32 &&
         SrcBank->getID() == AMDGPU::SCCRegBankID;
@@ -854,10 +888,11 @@ void AMDGPURegisterBankInfo::applyMappin
 
       MRI.setRegBank(True.getReg(0), *DstBank);
       MRI.setRegBank(False.getReg(0), *DstBank);
+      MRI.setRegBank(DstReg, *DstBank);
+
       if (DstSize > 32 && SrcBank->getID() != AMDGPU::SCCRegBankID) {
-        auto Sel = B.buildSelect(SelType, SrcReg, True, False);
-        MRI.setRegBank(Sel.getReg(0), *DstBank);
-        B.buildMerge(DstReg, { Sel.getReg(0), Sel.getReg(0) });
+        B.buildSelect(DefRegs[0], SrcReg, True, False);
+        B.buildCopy(DefRegs[1], DefRegs[0]);
       } else if (DstSize < 32) {
         auto Sel = B.buildSelect(SelType, SrcReg, True, False);
         MRI.setRegBank(Sel.getReg(0), *DstBank);
@@ -1313,8 +1348,17 @@ AMDGPURegisterBankInfo::getInstrMapping(
       break;
     }
 
-    OpdsMapping[0] = AMDGPU::getValueMapping(DstBank, DstSize);
-    OpdsMapping[1] = AMDGPU::getValueMapping(SrcBank->getID(), SrcSize);
+    // TODO: Should anyext be split into 32-bit part as well?
+    if (MI.getOpcode() == AMDGPU::G_ANYEXT) {
+      OpdsMapping[0] = AMDGPU::getValueMapping(DstBank, DstSize);
+      OpdsMapping[1] = AMDGPU::getValueMapping(SrcBank->getID(), SrcSize);
+    } else {
+      // Scalar extend can use 64-bit BFE, but VGPRs require extending to
+      // 32-bits, and then to 64.
+      OpdsMapping[0] = AMDGPU::getValueMappingSGPR64Only(DstBank, DstSize);
+      OpdsMapping[1] = AMDGPU::getValueMappingSGPR64Only(SrcBank->getID(),
+                                                         SrcSize);
+    }
     break;
   }
   case AMDGPU::G_FCMP: {

Modified: llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sext.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sext.mir?rev=364212&r1=364211&r2=364212&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sext.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sext.mir Mon Jun 24 10:54:12 2019
@@ -17,6 +17,22 @@ body: |
 ...
 
 ---
+name: sext_s16_to_s64_s
+legalized: true
+
+body: |
+  bb.0:
+    liveins: $sgpr0
+    ; CHECK-LABEL: name: sext_s16_to_s64_s
+    ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+    ; CHECK: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32)
+    ; CHECK: [[SEXT:%[0-9]+]]:sgpr(s64) = G_SEXT [[TRUNC]](s16)
+    %0:_(s32) = COPY $sgpr0
+    %1:_(s16) = G_TRUNC %0
+    %2:_(s64) = G_SEXT %1
+...
+
+---
 name: sext_s32_to_s64_v
 legalized: true
 
@@ -25,7 +41,10 @@ body: |
     liveins: $vgpr0_vgpr1
     ; CHECK-LABEL: name: sext_s32_to_s64_v
     ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
-    ; CHECK: [[SEXT:%[0-9]+]]:vgpr(s64) = G_SEXT [[COPY]](s32)
+    ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
+    ; CHECK: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 31
+    ; CHECK: [[ASHR:%[0-9]+]]:vgpr(s32) = G_ASHR [[COPY1]], [[C]](s32)
+    ; CHECK: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY1]](s32), [[ASHR]](s32)
     %0:_(s32) = COPY $vgpr0
     %1:_(s64) = G_SEXT %0
 ...
@@ -146,7 +165,8 @@ body: |
     ; CHECK: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 -1
     ; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
     ; CHECK: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[C]], [[C1]]
-    ; CHECK: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[SELECT]](s32), [[SELECT]](s32)
+    ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[SELECT]](s32)
+    ; CHECK: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[SELECT]](s32), [[COPY2]](s32)
     %0:_(s32) = COPY $vgpr0
     %1:_(s32) = COPY $vgpr1
     %2:_(s1) = G_ICMP intpred(eq), %0, %1
@@ -258,11 +278,30 @@ body: |
     ; CHECK-LABEL: name: sext_s1_to_s64_vgpr
     ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
     ; CHECK: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32)
-    ; CHECK: [[ANYEXT:%[0-9]+]]:vgpr(s64) = G_ANYEXT [[TRUNC]](s1)
-    ; CHECK: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 63
-    ; CHECK: [[SHL:%[0-9]+]]:vgpr(s64) = G_SHL [[ANYEXT]], [[C]](s32)
-    ; CHECK: [[ASHR:%[0-9]+]]:vgpr(s64) = G_ASHR [[SHL]], [[C]](s32)
+    ; CHECK: [[SEXT:%[0-9]+]]:vgpr(s32) = G_SEXT [[TRUNC]](s1)
+    ; CHECK: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 31
+    ; CHECK: [[ASHR:%[0-9]+]]:vgpr(s32) = G_ASHR [[SEXT]], [[C]](s32)
+    ; CHECK: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[SEXT]](s32), [[ASHR]](s32)
     %0:_(s32) = COPY $vgpr0
     %1:_(s1) = G_TRUNC %0
     %2:_(s64) = G_SEXT %1
 ...
+
+---
+name: sext_s16_to_s64_vgpr
+legalized: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0
+    ; CHECK-LABEL: name: sext_s16_to_s64_vgpr
+    ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; CHECK: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32)
+    ; CHECK: [[SEXT:%[0-9]+]]:vgpr(s32) = G_SEXT [[TRUNC]](s16)
+    ; CHECK: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 31
+    ; CHECK: [[ASHR:%[0-9]+]]:vgpr(s32) = G_ASHR [[SEXT]], [[C]](s32)
+    ; CHECK: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[SEXT]](s32), [[ASHR]](s32)
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s16) = G_TRUNC %0
+    %2:_(s64) = G_SEXT %1
+...

Modified: llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/regbankselect-zext.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/regbankselect-zext.mir?rev=364212&r1=364211&r2=364212&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/regbankselect-zext.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/regbankselect-zext.mir Mon Jun 24 10:54:12 2019
@@ -17,6 +17,22 @@ body: |
 ...
 
 ---
+name: zext_s16_to_s64_s
+legalized: true
+
+body: |
+  bb.0:
+    liveins: $sgpr0
+    ; CHECK-LABEL: name: zext_s16_to_s64_s
+    ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+    ; CHECK: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32)
+    ; CHECK: [[ZEXT:%[0-9]+]]:sgpr(s64) = G_ZEXT [[TRUNC]](s16)
+    %0:_(s32) = COPY $sgpr0
+    %1:_(s16) = G_TRUNC %0
+    %2:_(s64) = G_ZEXT %1
+...
+
+---
 name: zext_s32_to_s64_v
 legalized: true
 
@@ -25,7 +41,9 @@ body: |
     liveins: $vgpr0_vgpr1
     ; CHECK-LABEL: name: zext_s32_to_s64_v
     ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
-    ; CHECK: [[ZEXT:%[0-9]+]]:vgpr(s64) = G_ZEXT [[COPY]](s32)
+    ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
+    ; CHECK: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
+    ; CHECK: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY1]](s32), [[C]](s32)
     %0:_(s32) = COPY $vgpr0
     %1:_(s64) = G_ZEXT %0
 ...
@@ -146,7 +164,8 @@ body: |
     ; CHECK: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1
     ; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
     ; CHECK: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[C]], [[C1]]
-    ; CHECK: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[SELECT]](s32), [[SELECT]](s32)
+    ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[SELECT]](s32)
+    ; CHECK: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[SELECT]](s32), [[COPY2]](s32)
     %0:_(s32) = COPY $vgpr0
     %1:_(s32) = COPY $vgpr1
     %2:_(s1) = G_ICMP intpred(eq), %0, %1
@@ -258,11 +277,28 @@ body: |
     ; CHECK-LABEL: name: zext_s1_to_s64_vgpr
     ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
     ; CHECK: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32)
-    ; CHECK: [[ANYEXT:%[0-9]+]]:vgpr(s64) = G_ANYEXT [[TRUNC]](s1)
-    ; CHECK: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 63
-    ; CHECK: [[SHL:%[0-9]+]]:vgpr(s64) = G_SHL [[ANYEXT]], [[C]](s32)
-    ; CHECK: [[LSHR:%[0-9]+]]:vgpr(s64) = G_LSHR [[SHL]], [[C]](s32)
+    ; CHECK: [[ZEXT:%[0-9]+]]:vgpr(s32) = G_ZEXT [[TRUNC]](s1)
+    ; CHECK: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
+    ; CHECK: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[ZEXT]](s32), [[C]](s32)
     %0:_(s32) = COPY $vgpr0
     %1:_(s1) = G_TRUNC %0
     %2:_(s64) = G_ZEXT %1
 ...
+
+---
+name: zext_s16_to_s64_vgpr
+legalized: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0
+    ; CHECK-LABEL: name: zext_s16_to_s64_vgpr
+    ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; CHECK: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32)
+    ; CHECK: [[ZEXT:%[0-9]+]]:vgpr(s32) = G_ZEXT [[TRUNC]](s16)
+    ; CHECK: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
+    ; CHECK: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[ZEXT]](s32), [[C]](s32)
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s16) = G_TRUNC %0
+    %2:_(s64) = G_ZEXT %1
+...




More information about the llvm-commits mailing list