[llvm] bb71079 - [AMDGPU][GISel] Add missing V2S16 BUILD_VECTOR_TRUNC legalization
Pierre van Houtryve via llvm-commits
llvm-commits at lists.llvm.org
Wed Oct 5 23:49:02 PDT 2022
Author: Pierre van Houtryve
Date: 2022-10-06T06:48:53Z
New Revision: bb71079e3064d32b93328fd76964d88ca569deec
URL: https://github.com/llvm/llvm-project/commit/bb71079e3064d32b93328fd76964d88ca569deec
DIFF: https://github.com/llvm/llvm-project/commit/bb71079e3064d32b93328fd76964d88ca569deec.diff
LOG: [AMDGPU][GISel] Add missing V2S16 BUILD_VECTOR_TRUNC legalization
Previously we would be unable to legalize V2S16 BUILD_VECTOR_TRUNC on GFX8 & below as the custom legalization was missing.
Reviewed By: arsenm
Differential Revision: https://reviews.llvm.org/D135149
Added:
Modified:
llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-build-vector-trunc.mir
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index 124cf7140c06..dfa3c0af6526 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -1791,6 +1791,7 @@ bool AMDGPULegalizerInfo::legalizeCustom(LegalizerHelper &Helper,
case TargetOpcode::G_FFLOOR:
return legalizeFFloor(MI, MRI, B);
case TargetOpcode::G_BUILD_VECTOR:
+ case TargetOpcode::G_BUILD_VECTOR_TRUNC:
return legalizeBuildVector(MI, MRI, B);
case TargetOpcode::G_MUL:
return legalizeMul(Helper, MI);
@@ -2859,11 +2860,17 @@ bool AMDGPULegalizerInfo::legalizeBuildVector(
MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const {
Register Dst = MI.getOperand(0).getReg();
const LLT S32 = LLT::scalar(32);
+ const LLT S16 = LLT::scalar(16);
assert(MRI.getType(Dst) == LLT::fixed_vector(2, 16));
Register Src0 = MI.getOperand(1).getReg();
Register Src1 = MI.getOperand(2).getReg();
- assert(MRI.getType(Src0) == LLT::scalar(16));
+
+ if (MI.getOpcode() == AMDGPU::G_BUILD_VECTOR_TRUNC) {
+ assert(MRI.getType(Src0) == S32);
+ Src0 = B.buildTrunc(S16, MI.getOperand(1).getReg()).getReg(0);
+ Src1 = B.buildTrunc(S16, MI.getOperand(2).getReg()).getReg(0);
+ }
auto Merge = B.buildMerge(S32, {Src0, Src1});
B.buildBitcast(Dst, Merge);
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-build-vector-trunc.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-build-vector-trunc.mir
index abb196cdaebf..fc2dadcc7048 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-build-vector-trunc.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-build-vector-trunc.mir
@@ -1,7 +1,9 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -O0 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s
-# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -O0 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s
-# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -O0 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -O0 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9PLUS %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -O0 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9PLUS %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -O0 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9PLUS %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -O0 -run-pass=legalizer %s -o - | FileCheck -check-prefix=PREGFX8 %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=bonaire -O0 -run-pass=legalizer %s -o - | FileCheck -check-prefix=PREGFX8 %s
---
name: legal_s32_to_v2s16
@@ -9,13 +11,26 @@ body: |
bb.0:
liveins: $vgpr0, $vgpr1
- ; GFX9-LABEL: name: legal_s32_to_v2s16
- ; GFX9: liveins: $vgpr0, $vgpr1
- ; GFX9-NEXT: {{ $}}
- ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY]](s32), [[COPY1]](s32)
- ; GFX9-NEXT: S_NOP 0, implicit [[BUILD_VECTOR_TRUNC]](<2 x s16>)
+ ; GFX9PLUS-LABEL: name: legal_s32_to_v2s16
+ ; GFX9PLUS: liveins: $vgpr0, $vgpr1
+ ; GFX9PLUS-NEXT: {{ $}}
+ ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX9PLUS-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX9PLUS-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY]](s32), [[COPY1]](s32)
+ ; GFX9PLUS-NEXT: S_NOP 0, implicit [[BUILD_VECTOR_TRUNC]](<2 x s16>)
+ ; PREGFX8-LABEL: name: legal_s32_to_v2s16
+ ; PREGFX8: liveins: $vgpr0, $vgpr1
+ ; PREGFX8-NEXT: {{ $}}
+ ; PREGFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; PREGFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; PREGFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+ ; PREGFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]]
+ ; PREGFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]]
+ ; PREGFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; PREGFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
+ ; PREGFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+ ; PREGFX8-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; PREGFX8-NEXT: S_NOP 0, implicit [[BITCAST]](<2 x s16>)
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
%2:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC %0, %1
More information about the llvm-commits
mailing list