[llvm] 8550859 - GlobalISel: Extend shift narrowing to G_SHL
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Mon Feb 17 09:13:55 PST 2020
Author: Matt Arsenault
Date: 2020-02-17T09:13:37-08:00
New Revision: 85508595350e2de0218c15f1c0088cd9f6236894
URL: https://github.com/llvm/llvm-project/commit/85508595350e2de0218c15f1c0088cd9f6236894
DIFF: https://github.com/llvm/llvm-project/commit/85508595350e2de0218c15f1c0088cd9f6236894.diff
LOG: GlobalISel: Extend shift narrowing to G_SHL
Added:
llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shl-narrow.mir
Modified:
llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index 855c0136aab3..42b769f9dc9b 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -1376,7 +1376,8 @@ bool CombinerHelper::applyCombineMulToShl(MachineInstr &MI,
bool CombinerHelper::matchCombineShiftToUnmerge(MachineInstr &MI,
unsigned TargetShiftSize,
unsigned &ShiftVal) {
- assert(MI.getOpcode() == TargetOpcode::G_LSHR && "Expected a shift");
+ assert((MI.getOpcode() == TargetOpcode::G_SHL ||
+ MI.getOpcode() == TargetOpcode::G_LSHR) && "Expected a shift");
LLT Ty = MRI.getType(MI.getOperand(0).getReg());
if (Ty.isVector()) // TODO:
@@ -1396,32 +1397,55 @@ bool CombinerHelper::matchCombineShiftToUnmerge(MachineInstr &MI,
return ShiftVal >= Size / 2 && ShiftVal < Size;
}
-// dst = G_LSHR s64:x, C for C >= 32
-// =>
-// lo, hi = G_UNMERGE_VALUES x
-// dst = merge_values (G_LSHR hi, C - 32), 0
bool CombinerHelper::applyCombineShiftToUnmerge(MachineInstr &MI,
const unsigned &ShiftVal) {
Register DstReg = MI.getOperand(0).getReg();
Register SrcReg = MI.getOperand(1).getReg();
LLT Ty = MRI.getType(SrcReg);
unsigned Size = Ty.getSizeInBits();
+ unsigned HalfSize = Size / 2;
- assert(ShiftVal >= Size / 2);
- LLT HalfTy = LLT::scalar(Size / 2);
+ assert(ShiftVal >= HalfSize);
+ LLT HalfTy = LLT::scalar(HalfSize);
Builder.setInstr(MI);
auto Unmerge = Builder.buildUnmerge(HalfTy, SrcReg);
+ unsigned NarrowShiftAmt = ShiftVal - HalfSize;
- Register Narrowed = Unmerge.getReg(1);
- if (ShiftVal > Size / 2) {
- Narrowed = Builder.buildLShr(
- HalfTy, Narrowed,
- Builder.buildConstant(HalfTy, ShiftVal - Size / 2)).getReg(0);
+ if (MI.getOpcode() == TargetOpcode::G_LSHR) {
+ Register Narrowed = Unmerge.getReg(1);
+
+ // dst = G_LSHR s64:x, C for C >= 32
+ // =>
+ // lo, hi = G_UNMERGE_VALUES x
+ // dst = G_MERGE_VALUES (G_LSHR hi, C - 32), 0
+
+ if (NarrowShiftAmt != 0) {
+ Narrowed = Builder.buildLShr(HalfTy, Narrowed,
+ Builder.buildConstant(HalfTy, NarrowShiftAmt)).getReg(0);
+ }
+
+ auto Zero = Builder.buildConstant(HalfTy, 0);
+ Builder.buildMerge(DstReg, { Narrowed, Zero });
+ } else {
+ Register Narrowed = Unmerge.getReg(0);
+ // dst = G_SHL s64:x, C for C >= 32
+ // =>
+ // lo, hi = G_UNMERGE_VALUES x
+ // dst = G_MERGE_VALUES 0, (G_SHL hi, C - 32)
+
+ // TODO: ashr
+ assert(MI.getOpcode() == TargetOpcode::G_SHL);
+
+ if (NarrowShiftAmt != 0) {
+ Narrowed = Builder.buildShl(HalfTy, Narrowed,
+ Builder.buildConstant(HalfTy, NarrowShiftAmt)).getReg(0);
+ }
+
+ auto Zero = Builder.buildConstant(HalfTy, 0);
+ Builder.buildMerge(DstReg, { Zero, Narrowed });
}
- Builder.buildMerge(
- DstReg, { Narrowed, Builder.buildConstant(HalfTy, 0).getReg(0) });
MI.eraseFromParent();
return true;
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp
index 533cd6780067..2d14c1fc9f9d 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp
@@ -165,6 +165,7 @@ bool AMDGPUPreLegalizerCombinerInfo::combine(GISelChangeObserver &Observer,
return true;
switch (MI.getOpcode()) {
+ case TargetOpcode::G_SHL:
case TargetOpcode::G_LSHR:
// On some subtargets, 64-bit shift is a quarter rate instruction. In the
// common case, splitting this into a move and a 32-bit shift is faster and
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shl-narrow.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shl-narrow.mir
new file mode 100644
index 000000000000..313303e155bf
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shl-narrow.mir
@@ -0,0 +1,203 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -run-pass=amdgpu-prelegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s
+
+
+---
+name: narrow_shl_s64_32_s64amt
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1
+
+ ; CHECK-LABEL: name: narrow_shl_s64_32_s64amt
+ ; CHECK: liveins: $vgpr0_vgpr1
+ ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
+ ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64)
+ ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C]](s32), [[UV]](s32)
+ ; CHECK: $vgpr0_vgpr1 = COPY [[MV]](s64)
+ %0:_(s64) = COPY $vgpr0_vgpr1
+ %1:_(s64) = G_CONSTANT i64 32
+ %2:_(s64) = G_SHL %0, %1
+ $vgpr0_vgpr1 = COPY %2
+...
+
+---
+name: narrow_shl_s64_32
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1
+
+ ; CHECK-LABEL: name: narrow_shl_s64_32
+ ; CHECK: liveins: $vgpr0_vgpr1
+ ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
+ ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64)
+ ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C]](s32), [[UV]](s32)
+ ; CHECK: $vgpr0_vgpr1 = COPY [[MV]](s64)
+ %0:_(s64) = COPY $vgpr0_vgpr1
+ %1:_(s32) = G_CONSTANT i32 32
+ %2:_(s64) = G_SHL %0, %1
+ $vgpr0_vgpr1 = COPY %2
+...
+
+---
+name: narrow_shl_s64_33
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1
+
+ ; CHECK-LABEL: name: narrow_shl_s64_33
+ ; CHECK: liveins: $vgpr0_vgpr1
+ ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
+ ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64)
+ ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[UV]], [[C]](s32)
+ ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C1]](s32), [[SHL]](s32)
+ ; CHECK: $vgpr0_vgpr1 = COPY [[MV]](s64)
+ %0:_(s64) = COPY $vgpr0_vgpr1
+ %1:_(s32) = G_CONSTANT i32 33
+ %2:_(s64) = G_SHL %0, %1
+ $vgpr0_vgpr1 = COPY %2
+...
+
+---
+name: narrow_shl_s64_31
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1
+
+ ; CHECK-LABEL: name: narrow_shl_s64_31
+ ; CHECK: liveins: $vgpr0_vgpr1
+ ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
+ ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
+ ; CHECK: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[C]](s32)
+ ; CHECK: $vgpr0_vgpr1 = COPY [[SHL]](s64)
+ %0:_(s64) = COPY $vgpr0_vgpr1
+ %1:_(s32) = G_CONSTANT i32 31
+ %2:_(s64) = G_SHL %0, %1
+ $vgpr0_vgpr1 = COPY %2
+...
+
+---
+name: narrow_shl_s64_63
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1
+
+ ; CHECK-LABEL: name: narrow_shl_s64_63
+ ; CHECK: liveins: $vgpr0_vgpr1
+ ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
+ ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64)
+ ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
+ ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[UV]], [[C]](s32)
+ ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C1]](s32), [[SHL]](s32)
+ ; CHECK: $vgpr0_vgpr1 = COPY [[MV]](s64)
+ %0:_(s64) = COPY $vgpr0_vgpr1
+ %1:_(s32) = G_CONSTANT i32 63
+ %2:_(s64) = G_SHL %0, %1
+ $vgpr0_vgpr1 = COPY %2
+...
+
+---
+name: narrow_shl_s64_64
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1
+
+ ; CHECK-LABEL: name: narrow_shl_s64_64
+ ; CHECK: liveins: $vgpr0_vgpr1
+ ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
+ ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 64
+ ; CHECK: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[C]](s32)
+ ; CHECK: $vgpr0_vgpr1 = COPY [[SHL]](s64)
+ %0:_(s64) = COPY $vgpr0_vgpr1
+ %1:_(s32) = G_CONSTANT i32 64
+ %2:_(s64) = G_SHL %0, %1
+ $vgpr0_vgpr1 = COPY %2
+...
+
+---
+name: narrow_shl_s64_65
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1
+
+ ; CHECK-LABEL: name: narrow_shl_s64_65
+ ; CHECK: liveins: $vgpr0_vgpr1
+ ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
+ ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65
+ ; CHECK: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[C]](s32)
+ ; CHECK: $vgpr0_vgpr1 = COPY [[SHL]](s64)
+ %0:_(s64) = COPY $vgpr0_vgpr1
+ %1:_(s32) = G_CONSTANT i32 65
+ %2:_(s64) = G_SHL %0, %1
+ $vgpr0_vgpr1 = COPY %2
+...
+
+---
+name: narrow_shl_s32_16
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; CHECK-LABEL: name: narrow_shl_s32_16
+ ; CHECK: liveins: $vgpr0
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32)
+ ; CHECK: $vgpr0 = COPY [[SHL]](s32)
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s32) = G_CONSTANT i32 16
+ %2:_(s32) = G_SHL %0, %1
+ $vgpr0 = COPY %2
+...
+
+---
+name: narrow_shl_s32_17
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; CHECK-LABEL: name: narrow_shl_s32_17
+ ; CHECK: liveins: $vgpr0
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 17
+ ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32)
+ ; CHECK: $vgpr0 = COPY [[SHL]](s32)
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s32) = G_CONSTANT i32 17
+ %2:_(s32) = G_SHL %0, %1
+ $vgpr0 = COPY %2
+...
+
+---
+name: narrow_shl_v2s32_17
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1
+
+ ; CHECK-LABEL: name: narrow_shl_v2s32_17
+ ; CHECK: liveins: $vgpr0_vgpr1
+ ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
+ ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 17
+ ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32)
+ ; CHECK: [[SHL:%[0-9]+]]:_(<2 x s32>) = G_SHL [[COPY]], [[BUILD_VECTOR]](<2 x s32>)
+ ; CHECK: $vgpr0_vgpr1 = COPY [[SHL]](<2 x s32>)
+ %0:_(<2 x s32>) = COPY $vgpr0_vgpr1
+ %1:_(s32) = G_CONSTANT i32 17
+ %2:_(<2 x s32>) = G_BUILD_VECTOR %1, %1
+ %3:_(<2 x s32>) = G_SHL %0, %2
+ $vgpr0_vgpr1 = COPY %3
+...
More information about the llvm-commits
mailing list