[llvm] r371416 - AMDGPU/GlobalISel: Fix RegBankSelect for unaligned, uniform constant loads
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Mon Sep 9 09:06:37 PDT 2019
Author: arsenm
Date: Mon Sep 9 09:06:37 2019
New Revision: 371416
URL: http://llvm.org/viewvc/llvm-project?rev=371416&view=rev
Log:
AMDGPU/GlobalISel: Fix RegBankSelect for unaligned, uniform constant loads
Modified:
llvm/trunk/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/regbankselect-load.mir
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp?rev=371416&r1=371415&r2=371416&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp Mon Sep 9 09:06:37 2019
@@ -320,12 +320,13 @@ AMDGPURegisterBankInfo::getInstrAlternat
}
}
-static bool isInstrUniformNonExtLoad(const MachineInstr &MI) {
+static bool isInstrUniformNonExtLoadAlign4(const MachineInstr &MI) {
if (!MI.hasOneMemOperand())
return false;
const MachineMemOperand *MMO = *MI.memoperands_begin();
- return MMO->getSize() >= 4 && AMDGPUInstrInfo::isUniformMMO(MMO);
+ return MMO->getSize() >= 4 && MMO->getAlignment() >= 4 &&
+ AMDGPUInstrInfo::isUniformMMO(MMO);
}
RegisterBankInfo::InstructionMappings
@@ -426,7 +427,7 @@ AMDGPURegisterBankInfo::getInstrAlternat
unsigned PtrSize = PtrTy.getSizeInBits();
unsigned AS = PtrTy.getAddressSpace();
LLT LoadTy = MRI.getType(MI.getOperand(0).getReg());
- if (isInstrUniformNonExtLoad(MI) &&
+ if (isInstrUniformNonExtLoadAlign4(MI) &&
(AS != AMDGPUAS::LOCAL_ADDRESS && AS != AMDGPUAS::REGION_ADDRESS)) {
const InstructionMapping &SSMapping = getInstructionMapping(
1, 1, getOperandsMapping(
@@ -1482,7 +1483,7 @@ AMDGPURegisterBankInfo::getInstrMappingF
const ValueMapping *ValMapping;
const ValueMapping *PtrMapping;
- if (isInstrUniformNonExtLoad(MI) &&
+ if (isInstrUniformNonExtLoadAlign4(MI) &&
(AS != AMDGPUAS::LOCAL_ADDRESS && AS != AMDGPUAS::REGION_ADDRESS)) {
// We have a uniform instruction so we want to use an SMRD load
ValMapping = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
Modified: llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/regbankselect-load.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/regbankselect-load.mir?rev=371416&r1=371415&r2=371416&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/regbankselect-load.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/regbankselect-load.mir Mon Sep 9 09:06:37 2019
@@ -1,5 +1,5 @@
-# RUN: llc -march=amdgcn -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck %s
-# RUN: llc -march=amdgcn -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-greedy | FileCheck %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-greedy | FileCheck %s
--- |
define amdgpu_kernel void @load_global_v8i32_non_uniform(<8 x i32> addrspace(1)* %in) {
@@ -65,6 +65,9 @@
define amdgpu_kernel void @extload_global_i8_to_i32_uniform() { ret void }
define amdgpu_kernel void @extload_constant_i16_to_i32_uniform() { ret void }
define amdgpu_kernel void @extload_global_i16_to_i32_uniform() { ret void }
+ define amdgpu_kernel void @load_constant_i32_uniform_align4() {ret void}
+ define amdgpu_kernel void @load_constant_i32_uniform_align2() {ret void}
+ define amdgpu_kernel void @load_constant_i32_uniform_align1() {ret void}
declare i32 @llvm.amdgcn.workitem.id.x() #0
attributes #0 = { nounwind readnone }
@@ -586,3 +589,49 @@ body: |
%0:_(p4) = COPY $sgpr0_sgpr1
%1:_(s32) = G_LOAD %0 :: (load 2, addrspace 1, align 2)
...
+
+---
+name: load_constant_i32_uniform_align4
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1
+ ; CHECK-LABEL: name: load_constant_i32_uniform_align4
+ ; CHECK: %0:sgpr(p4) = COPY $sgpr0_sgpr1
+ ; CHECK: %1:sgpr(s32) = G_LOAD %0(p4) :: (load 4, addrspace 4)
+ %0:_(p4) = COPY $sgpr0_sgpr1
+ %1:_(s32) = G_LOAD %0 :: (load 4, addrspace 4, align 4)
+...
+
+---
+name: load_constant_i32_uniform_align2
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1
+ ; CHECK-LABEL: name: load_constant_i32_uniform_align2
+ ; CHECK: %0:sgpr(p4) = COPY $sgpr0_sgpr1
+ ; CHECK: %2:vgpr(p4) = COPY %0(p4)
+ ; CHECK: %1:vgpr(s32) = G_LOAD %2(p4) :: (load 4, align 2, addrspace 4)
+
+ %0:_(p4) = COPY $sgpr0_sgpr1
+ %1:_(s32) = G_LOAD %0 :: (load 4, addrspace 4, align 2)
+...
+
+---
+name: load_constant_i32_uniform_align1
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1
+
+ ; CHECK-LABEL: name: load_constant_i32_uniform_align1
+ ; CHECK: %0:sgpr(p4) = COPY $sgpr0_sgpr1
+ ; CHECK: %2:vgpr(p4) = COPY %0(p4)
+ ; CHECK: %1:vgpr(s32) = G_LOAD %2(p4) :: (load 4, align 1, addrspace 4)
+ %0:_(p4) = COPY $sgpr0_sgpr1
+ %1:_(s32) = G_LOAD %0 :: (load 4, addrspace 4, align 1)
+...
More information about the llvm-commits
mailing list