[llvm] 792667d - [GlobalISel] Check if ShiftAmt is greater then size of operand
Mirko Brkusanin via llvm-commits
llvm-commits at lists.llvm.org
Thu Jun 8 08:40:06 PDT 2023
Author: Mirko Brkusanin
Date: 2023-06-08T17:37:59+02:00
New Revision: 792667daddecd90c5ce2e2936dc9116b811c3bb0
URL: https://github.com/llvm/llvm-project/commit/792667daddecd90c5ce2e2936dc9116b811c3bb0
DIFF: https://github.com/llvm/llvm-project/commit/792667daddecd90c5ce2e2936dc9116b811c3bb0.diff
LOG: [GlobalISel] Check if ShiftAmt is greater then size of operand
matchCombineShlOfExtend did not check if the size of new shift would be
wider then a size of operand. Current condition did not work if the value
being shifted was zero. Updated to support vector splat.
Patch by: Acim Maravic
Differential Revision: https://reviews.llvm.org/D151122
Added:
Modified:
llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
llvm/test/CodeGen/AArch64/GlobalISel/prelegalizer-combiner-divrem-insertpt-crash.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shl-from-extend-narrow.postlegal.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shl-from-extend-narrow.prelegal.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/shl-ext-reduce.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index d1ebfb8b3df48..d958c9c1dda04 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -1722,9 +1722,9 @@ bool CombinerHelper::matchCombineShlOfExtend(MachineInstr &MI,
!mi_match(LHS, MRI, m_GSExt(m_Reg(ExtSrc))))
return false;
- // TODO: Should handle vector splat.
Register RHS = MI.getOperand(2).getReg();
- auto MaybeShiftAmtVal = getIConstantVRegValWithLookThrough(RHS, MRI);
+ MachineInstr *MIShiftAmt = MRI.getVRegDef(RHS);
+ auto MaybeShiftAmtVal = isConstantOrConstantSplatVector(*MIShiftAmt, MRI);
if (!MaybeShiftAmtVal)
return false;
@@ -1739,12 +1739,13 @@ bool CombinerHelper::matchCombineShlOfExtend(MachineInstr &MI,
return false;
}
- int64_t ShiftAmt = MaybeShiftAmtVal->Value.getSExtValue();
+ int64_t ShiftAmt = MaybeShiftAmtVal->getSExtValue();
MatchData.Reg = ExtSrc;
MatchData.Imm = ShiftAmt;
unsigned MinLeadingZeros = KB->getKnownZeroes(ExtSrc).countl_one();
- return MinLeadingZeros >= ShiftAmt;
+ unsigned SrcTySize = MRI.getType(ExtSrc).getScalarSizeInBits();
+ return MinLeadingZeros >= ShiftAmt && ShiftAmt < SrcTySize;
}
void CombinerHelper::applyCombineShlOfExtend(MachineInstr &MI,
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizer-combiner-divrem-insertpt-crash.mir b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizer-combiner-divrem-insertpt-crash.mir
index f5a712515d9cb..6d9d695ba6890 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizer-combiner-divrem-insertpt-crash.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizer-combiner-divrem-insertpt-crash.mir
@@ -17,18 +17,20 @@ body: |
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s1) = G_IMPLICIT_DEF
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
; CHECK-NEXT: G_BRCOND [[DEF]](s1), %bb.2
; CHECK-NEXT: G_BR %bb.1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.2(0x80000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[C1]](s32)
- ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1
- ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s64) = G_FREEZE [[C2]]
+ ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[C2]](s32)
+ ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1
+ ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s64) = G_FREEZE [[C3]]
; CHECK-NEXT: [[UDIV:%[0-9]+]]:_(s64) = G_UDIV [[FREEZE]], [[C]]
- ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[ZEXT]], [[UDIV]]
+ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[ZEXT]], [[C1]](s64)
+ ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[SHL]], [[UDIV]]
; CHECK-NEXT: G_STORE [[ADD]](s64), [[COPY]](p0) :: (store (s64))
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shl-from-extend-narrow.postlegal.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shl-from-extend-narrow.postlegal.mir
index f46060cc40b5a..2851e8133427a 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shl-from-extend-narrow.postlegal.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shl-from-extend-narrow.postlegal.mir
@@ -327,3 +327,105 @@ body: |
%shl:_(s64) = G_SHL %extend, %shiftamt
$vgpr0_vgpr1 = COPY %shl
...
+
+---
+name: do_not_shl_s32_zero_by_16_from_zext_s16
+tracksRegLiveness: true
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0
+ ; GFX6-LABEL: name: do_not_shl_s32_zero_by_16_from_zext_s16
+ ; GFX6: liveins: $vgpr0
+ ; GFX6-NEXT: {{ $}}
+ ; GFX6-NEXT: %zero:_(s16) = G_CONSTANT i16 0
+ ; GFX6-NEXT: %extend:_(s32) = G_ZEXT %zero(s16)
+ ; GFX6-NEXT: %shiftamt:_(s16) = G_CONSTANT i16 16
+ ; GFX6-NEXT: %shl:_(s32) = G_SHL %extend, %shiftamt(s16)
+ ; GFX6-NEXT: $vgpr0 = COPY %shl(s32)
+ ; GFX9-LABEL: name: do_not_shl_s32_zero_by_16_from_zext_s16
+ ; GFX9: liveins: $vgpr0
+ ; GFX9-NEXT: {{ $}}
+ ; GFX9-NEXT: %zero:_(s16) = G_CONSTANT i16 0
+ ; GFX9-NEXT: %extend:_(s32) = G_ZEXT %zero(s16)
+ ; GFX9-NEXT: %shiftamt:_(s16) = G_CONSTANT i16 16
+ ; GFX9-NEXT: %shl:_(s32) = G_SHL %extend, %shiftamt(s16)
+ ; GFX9-NEXT: $vgpr0 = COPY %shl(s32)
+ %zero:_(s16) = G_CONSTANT i16 0
+ %extend:_(s32) = G_ZEXT %zero:_(s16)
+ %shiftamt:_(s16) = G_CONSTANT i16 16
+ %shl:_(s32) = G_SHL %extend, %shiftamt(s16)
+ $vgpr0 = COPY %shl
+...
+
+---
+name: do_not_shl_v2s32_zero_by_16_from_zext_v2s16
+tracksRegLiveness: true
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0
+ ; GFX6-LABEL: name: do_not_shl_v2s32_zero_by_16_from_zext_v2s16
+ ; GFX6: liveins: $vgpr0
+ ; GFX6-NEXT: {{ $}}
+ ; GFX6-NEXT: %zero:_(s16) = G_CONSTANT i16 0
+ ; GFX6-NEXT: %zerovector:_(<2 x s16>) = G_BUILD_VECTOR %zero(s16), %zero(s16)
+ ; GFX6-NEXT: %shiftamt:_(s16) = G_CONSTANT i16 16
+ ; GFX6-NEXT: %shiftamtvector:_(<2 x s16>) = G_BUILD_VECTOR %shiftamt(s16), %shiftamt(s16)
+ ; GFX6-NEXT: %extend:_(<2 x s32>) = G_ZEXT %zerovector(<2 x s16>)
+ ; GFX6-NEXT: %shl:_(<2 x s32>) = G_SHL %extend, %shiftamtvector(<2 x s16>)
+ ; GFX6-NEXT: $vgpr0_vgpr1 = COPY %shl(<2 x s32>)
+ ; GFX9-LABEL: name: do_not_shl_v2s32_zero_by_16_from_zext_v2s16
+ ; GFX9: liveins: $vgpr0
+ ; GFX9-NEXT: {{ $}}
+ ; GFX9-NEXT: %zero:_(s16) = G_CONSTANT i16 0
+ ; GFX9-NEXT: %zerovector:_(<2 x s16>) = G_BUILD_VECTOR %zero(s16), %zero(s16)
+ ; GFX9-NEXT: %shiftamt:_(s16) = G_CONSTANT i16 16
+ ; GFX9-NEXT: %shiftamtvector:_(<2 x s16>) = G_BUILD_VECTOR %shiftamt(s16), %shiftamt(s16)
+ ; GFX9-NEXT: %extend:_(<2 x s32>) = G_ZEXT %zerovector(<2 x s16>)
+ ; GFX9-NEXT: %shl:_(<2 x s32>) = G_SHL %extend, %shiftamtvector(<2 x s16>)
+ ; GFX9-NEXT: $vgpr0_vgpr1 = COPY %shl(<2 x s32>)
+ %zero:_(s16) = G_CONSTANT i16 0
+ %zerovector:_(<2 x s16>) = G_BUILD_VECTOR %zero, %zero:_(s16)
+ %shiftamt:_(s16) = G_CONSTANT i16 16
+ %shiftamtvector:_(<2 x s16>) = G_BUILD_VECTOR %shiftamt, %shiftamt:_(s16)
+ %extend:_(<2 x s32>) = G_ZEXT %zerovector:_(<2 x s16>)
+ %shl:_(<2 x s32>) = G_SHL %extend, %shiftamtvector
+ $vgpr0_vgpr1 = COPY %shl
+...
+
+---
+name: do_not_shl_s32_by_16_from_zext_s16
+tracksRegLiveness: true
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0
+ ; GFX6-LABEL: name: do_not_shl_s32_by_16_from_zext_s16
+ ; GFX6: liveins: $vgpr0
+ ; GFX6-NEXT: {{ $}}
+ ; GFX6-NEXT: %argument:_(s32) = COPY $vgpr0
+ ; GFX6-NEXT: %truncate:_(s16) = G_TRUNC %argument(s32)
+ ; GFX6-NEXT: %shiftamt:_(s16) = G_CONSTANT i16 16
+ ; GFX6-NEXT: %extend:_(s32) = G_ZEXT %truncate(s16)
+ ; GFX6-NEXT: %shl:_(s32) = G_SHL %extend, %shiftamt(s16)
+ ; GFX6-NEXT: $vgpr0 = COPY %shl(s32)
+ ; GFX9-LABEL: name: do_not_shl_s32_by_16_from_zext_s16
+ ; GFX9: liveins: $vgpr0
+ ; GFX9-NEXT: {{ $}}
+ ; GFX9-NEXT: %argument:_(s32) = COPY $vgpr0
+ ; GFX9-NEXT: %truncate:_(s16) = G_TRUNC %argument(s32)
+ ; GFX9-NEXT: %shiftamt:_(s16) = G_CONSTANT i16 16
+ ; GFX9-NEXT: %extend:_(s32) = G_ZEXT %truncate(s16)
+ ; GFX9-NEXT: %shl:_(s32) = G_SHL %extend, %shiftamt(s16)
+ ; GFX9-NEXT: $vgpr0 = COPY %shl(s32)
+ %argument:_(s32) = COPY $vgpr0
+ %truncate:_(s16) = G_TRUNC %argument:_(s32)
+ %shiftamt:_(s16) = G_CONSTANT i16 16
+ %extend:_(s32) = G_ZEXT %truncate:_(s16)
+ %shl:_(s32) = G_SHL %extend, %shiftamt(s16)
+ $vgpr0 = COPY %shl
+...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shl-from-extend-narrow.prelegal.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shl-from-extend-narrow.prelegal.mir
index 8948773439298..317c1f29e43f7 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shl-from-extend-narrow.prelegal.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shl-from-extend-narrow.prelegal.mir
@@ -134,10 +134,10 @@ body: |
; GFX6-NEXT: %masklow14:_(s16) = G_CONSTANT i16 16383
; GFX6-NEXT: %masklow14vec:_(<2 x s16>) = G_BUILD_VECTOR %masklow14(s16), %masklow14(s16)
; GFX6-NEXT: %masked:_(<2 x s16>) = G_AND %narrow, %masklow14vec
- ; GFX6-NEXT: %extend:_(<2 x s32>) = G_ZEXT %masked(<2 x s16>)
- ; GFX6-NEXT: %shiftamt:_(s32) = G_CONSTANT i32 2
- ; GFX6-NEXT: %shiftamtvec:_(<2 x s32>) = G_BUILD_VECTOR %shiftamt(s32), %shiftamt(s32)
- ; GFX6-NEXT: %shl:_(<2 x s32>) = G_SHL %extend, %shiftamtvec(<2 x s32>)
+ ; GFX6-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 2
+ ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C]](s16)
+ ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(<2 x s16>) = G_SHL %masked, [[BUILD_VECTOR]](<2 x s16>)
+ ; GFX6-NEXT: %shl:_(<2 x s32>) = G_ZEXT [[SHL]](<2 x s16>)
; GFX6-NEXT: $vgpr0_vgpr1 = COPY %shl(<2 x s32>)
; GFX9-LABEL: name: narrow_shl_v2s32_by_2_from_zext_v2s16
; GFX9: liveins: $vgpr0
@@ -146,10 +146,10 @@ body: |
; GFX9-NEXT: %masklow14:_(s16) = G_CONSTANT i16 16383
; GFX9-NEXT: %masklow14vec:_(<2 x s16>) = G_BUILD_VECTOR %masklow14(s16), %masklow14(s16)
; GFX9-NEXT: %masked:_(<2 x s16>) = G_AND %narrow, %masklow14vec
- ; GFX9-NEXT: %extend:_(<2 x s32>) = G_ZEXT %masked(<2 x s16>)
- ; GFX9-NEXT: %shiftamt:_(s32) = G_CONSTANT i32 2
- ; GFX9-NEXT: %shiftamtvec:_(<2 x s32>) = G_BUILD_VECTOR %shiftamt(s32), %shiftamt(s32)
- ; GFX9-NEXT: %shl:_(<2 x s32>) = G_SHL %extend, %shiftamtvec(<2 x s32>)
+ ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 2
+ ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C]](s16)
+ ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(<2 x s16>) = G_SHL %masked, [[BUILD_VECTOR]](<2 x s16>)
+ ; GFX9-NEXT: %shl:_(<2 x s32>) = G_ZEXT [[SHL]](<2 x s16>)
; GFX9-NEXT: $vgpr0_vgpr1 = COPY %shl(<2 x s32>)
%narrow:_(<2 x s16>) = COPY $vgpr0
%masklow14:_(s16) = G_CONSTANT i16 16383
@@ -176,10 +176,10 @@ body: |
; GFX6-NEXT: %masklow30:_(s32) = G_CONSTANT i32 1073741823
; GFX6-NEXT: %masklow30vec:_(<2 x s32>) = G_BUILD_VECTOR %masklow30(s32), %masklow30(s32)
; GFX6-NEXT: %masked:_(<2 x s32>) = G_AND %narrow, %masklow30vec
- ; GFX6-NEXT: %extend:_(<2 x s64>) = G_ANYEXT %masked(<2 x s32>)
; GFX6-NEXT: %shiftamt:_(s32) = G_CONSTANT i32 2
; GFX6-NEXT: %shiftamtvec:_(<2 x s32>) = G_BUILD_VECTOR %shiftamt(s32), %shiftamt(s32)
- ; GFX6-NEXT: %shl:_(<2 x s64>) = G_SHL %extend, %shiftamtvec(<2 x s32>)
+ ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(<2 x s32>) = G_SHL %masked, %shiftamtvec(<2 x s32>)
+ ; GFX6-NEXT: %shl:_(<2 x s64>) = G_ZEXT [[SHL]](<2 x s32>)
; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %shl(<2 x s64>)
; GFX9-LABEL: name: narrow_shl_v2s64_by_2_from_anyext_v2s32
; GFX9: liveins: $vgpr0_vgpr1
@@ -188,10 +188,10 @@ body: |
; GFX9-NEXT: %masklow30:_(s32) = G_CONSTANT i32 1073741823
; GFX9-NEXT: %masklow30vec:_(<2 x s32>) = G_BUILD_VECTOR %masklow30(s32), %masklow30(s32)
; GFX9-NEXT: %masked:_(<2 x s32>) = G_AND %narrow, %masklow30vec
- ; GFX9-NEXT: %extend:_(<2 x s64>) = G_ANYEXT %masked(<2 x s32>)
; GFX9-NEXT: %shiftamt:_(s32) = G_CONSTANT i32 2
; GFX9-NEXT: %shiftamtvec:_(<2 x s32>) = G_BUILD_VECTOR %shiftamt(s32), %shiftamt(s32)
- ; GFX9-NEXT: %shl:_(<2 x s64>) = G_SHL %extend, %shiftamtvec(<2 x s32>)
+ ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(<2 x s32>) = G_SHL %masked, %shiftamtvec(<2 x s32>)
+ ; GFX9-NEXT: %shl:_(<2 x s64>) = G_ZEXT [[SHL]](<2 x s32>)
; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %shl(<2 x s64>)
%narrow:_(<2 x s32>) = COPY $vgpr0_vgpr1
%masklow30:_(s32) = G_CONSTANT i32 1073741823
@@ -203,3 +203,105 @@ body: |
%shl:_(<2 x s64>) = G_SHL %extend, %shiftamtvec
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %shl
...
+
+---
+name: do_not_shl_s32_zero_by_16_from_zext_s16
+tracksRegLiveness: true
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0
+ ; GFX6-LABEL: name: do_not_shl_s32_zero_by_16_from_zext_s16
+ ; GFX6: liveins: $vgpr0
+ ; GFX6-NEXT: {{ $}}
+ ; GFX6-NEXT: %zero:_(s16) = G_CONSTANT i16 0
+ ; GFX6-NEXT: %extend:_(s32) = G_ZEXT %zero(s16)
+ ; GFX6-NEXT: %shiftamt:_(s16) = G_CONSTANT i16 16
+ ; GFX6-NEXT: %shl:_(s32) = G_SHL %extend, %shiftamt(s16)
+ ; GFX6-NEXT: $vgpr0 = COPY %shl(s32)
+ ; GFX9-LABEL: name: do_not_shl_s32_zero_by_16_from_zext_s16
+ ; GFX9: liveins: $vgpr0
+ ; GFX9-NEXT: {{ $}}
+ ; GFX9-NEXT: %zero:_(s16) = G_CONSTANT i16 0
+ ; GFX9-NEXT: %extend:_(s32) = G_ZEXT %zero(s16)
+ ; GFX9-NEXT: %shiftamt:_(s16) = G_CONSTANT i16 16
+ ; GFX9-NEXT: %shl:_(s32) = G_SHL %extend, %shiftamt(s16)
+ ; GFX9-NEXT: $vgpr0 = COPY %shl(s32)
+ %zero:_(s16) = G_CONSTANT i16 0
+ %extend:_(s32) = G_ZEXT %zero:_(s16)
+ %shiftamt:_(s16) = G_CONSTANT i16 16
+ %shl:_(s32) = G_SHL %extend, %shiftamt(s16)
+ $vgpr0 = COPY %shl
+...
+
+---
+name: do_not_shl_v2s32_zero_by_16_from_zext_v2s16
+tracksRegLiveness: true
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1
+ ; GFX6-LABEL: name: do_not_shl_v2s32_zero_by_16_from_zext_v2s16
+ ; GFX6: liveins: $vgpr0, $vgpr1
+ ; GFX6-NEXT: {{ $}}
+ ; GFX6-NEXT: %zero:_(s16) = G_CONSTANT i16 0
+ ; GFX6-NEXT: %zerovector:_(<2 x s16>) = G_BUILD_VECTOR %zero(s16), %zero(s16)
+ ; GFX6-NEXT: %shiftamt:_(s16) = G_CONSTANT i16 16
+ ; GFX6-NEXT: %shiftamtvector:_(<2 x s16>) = G_BUILD_VECTOR %shiftamt(s16), %shiftamt(s16)
+ ; GFX6-NEXT: %extend:_(<2 x s32>) = G_ZEXT %zerovector(<2 x s16>)
+ ; GFX6-NEXT: %shl:_(<2 x s32>) = G_SHL %extend, %shiftamtvector(<2 x s16>)
+ ; GFX6-NEXT: $vgpr0_vgpr1 = COPY %shl(<2 x s32>)
+ ; GFX9-LABEL: name: do_not_shl_v2s32_zero_by_16_from_zext_v2s16
+ ; GFX9: liveins: $vgpr0, $vgpr1
+ ; GFX9-NEXT: {{ $}}
+ ; GFX9-NEXT: %zero:_(s16) = G_CONSTANT i16 0
+ ; GFX9-NEXT: %zerovector:_(<2 x s16>) = G_BUILD_VECTOR %zero(s16), %zero(s16)
+ ; GFX9-NEXT: %shiftamt:_(s16) = G_CONSTANT i16 16
+ ; GFX9-NEXT: %shiftamtvector:_(<2 x s16>) = G_BUILD_VECTOR %shiftamt(s16), %shiftamt(s16)
+ ; GFX9-NEXT: %extend:_(<2 x s32>) = G_ZEXT %zerovector(<2 x s16>)
+ ; GFX9-NEXT: %shl:_(<2 x s32>) = G_SHL %extend, %shiftamtvector(<2 x s16>)
+ ; GFX9-NEXT: $vgpr0_vgpr1 = COPY %shl(<2 x s32>)
+ %zero:_(s16) = G_CONSTANT i16 0
+ %zerovector:_(<2 x s16>) = G_BUILD_VECTOR %zero, %zero:_(s16)
+ %shiftamt:_(s16) = G_CONSTANT i16 16
+ %shiftamtvector:_(<2 x s16>) = G_BUILD_VECTOR %shiftamt, %shiftamt:_(s16)
+ %extend:_(<2 x s32>) = G_ZEXT %zerovector:_(<2 x s16>)
+ %shl:_(<2 x s32>) = G_SHL %extend, %shiftamtvector
+ $vgpr0_vgpr1 = COPY %shl
+...
+
+---
+name: do_not_shl_s32_by_16_from_zext_s16
+tracksRegLiveness: true
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0
+ ; GFX6-LABEL: name: do_not_shl_s32_by_16_from_zext_s16
+ ; GFX6: liveins: $vgpr0
+ ; GFX6-NEXT: {{ $}}
+ ; GFX6-NEXT: %argument:_(s32) = COPY $vgpr0
+ ; GFX6-NEXT: %truncate:_(s16) = G_TRUNC %argument(s32)
+ ; GFX6-NEXT: %shiftamt:_(s16) = G_CONSTANT i16 16
+ ; GFX6-NEXT: %extend:_(s32) = G_ZEXT %truncate(s16)
+ ; GFX6-NEXT: %shl:_(s32) = G_SHL %extend, %shiftamt(s16)
+ ; GFX6-NEXT: $vgpr0 = COPY %shl(s32)
+ ; GFX9-LABEL: name: do_not_shl_s32_by_16_from_zext_s16
+ ; GFX9: liveins: $vgpr0
+ ; GFX9-NEXT: {{ $}}
+ ; GFX9-NEXT: %argument:_(s32) = COPY $vgpr0
+ ; GFX9-NEXT: %truncate:_(s16) = G_TRUNC %argument(s32)
+ ; GFX9-NEXT: %shiftamt:_(s16) = G_CONSTANT i16 16
+ ; GFX9-NEXT: %extend:_(s32) = G_ZEXT %truncate(s16)
+ ; GFX9-NEXT: %shl:_(s32) = G_SHL %extend, %shiftamt(s16)
+ ; GFX9-NEXT: $vgpr0 = COPY %shl(s32)
+ %argument:_(s32) = COPY $vgpr0
+ %truncate:_(s16) = G_TRUNC %argument:_(s32)
+ %shiftamt:_(s16) = G_CONSTANT i16 16
+ %extend:_(s32) = G_ZEXT %truncate:_(s16)
+ %shl:_(s32) = G_SHL %extend, %shiftamt(s16)
+ $vgpr0 = COPY %shl
+...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/shl-ext-reduce.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/shl-ext-reduce.ll
index fdf7a6ad9f824..01bff586fd763 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/shl-ext-reduce.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/shl-ext-reduce.ll
@@ -434,12 +434,10 @@ define amdgpu_ps <2 x i64> @s_shl_v2i64_zext_v2i32(<2 x i32> inreg %x) {
; GCN-NEXT: s_brev_b32 s2, -4
; GCN-NEXT: s_mov_b32 s3, s2
; GCN-NEXT: s_and_b64 s[0:1], s[0:1], s[2:3]
+; GCN-NEXT: s_lshl_b32 s0, s0, 2
+; GCN-NEXT: s_lshl_b32 s2, s1, 2
+; GCN-NEXT: s_mov_b32 s1, 0
; GCN-NEXT: s_mov_b32 s3, 0
-; GCN-NEXT: s_mov_b32 s2, s0
-; GCN-NEXT: s_mov_b32 s4, s1
-; GCN-NEXT: s_mov_b32 s5, s3
-; GCN-NEXT: s_lshl_b64 s[0:1], s[2:3], 2
-; GCN-NEXT: s_lshl_b64 s[2:3], s[4:5], 2
; GCN-NEXT: ; return to shader part epilog
;
; GFX10PLUS-LABEL: s_shl_v2i64_zext_v2i32:
@@ -448,11 +446,9 @@ define amdgpu_ps <2 x i64> @s_shl_v2i64_zext_v2i32(<2 x i32> inreg %x) {
; GFX10PLUS-NEXT: s_mov_b32 s3, s2
; GFX10PLUS-NEXT: s_and_b64 s[0:1], s[0:1], s[2:3]
; GFX10PLUS-NEXT: s_mov_b32 s3, 0
-; GFX10PLUS-NEXT: s_mov_b32 s2, s0
-; GFX10PLUS-NEXT: s_mov_b32 s4, s1
-; GFX10PLUS-NEXT: s_mov_b32 s5, s3
-; GFX10PLUS-NEXT: s_lshl_b64 s[0:1], s[2:3], 2
-; GFX10PLUS-NEXT: s_lshl_b64 s[2:3], s[4:5], 2
+; GFX10PLUS-NEXT: s_lshl_b32 s0, s0, 2
+; GFX10PLUS-NEXT: s_lshl_b32 s2, s1, 2
+; GFX10PLUS-NEXT: s_mov_b32 s1, 0
; GFX10PLUS-NEXT: ; return to shader part epilog
%and = and <2 x i32> %x, <i32 1073741823, i32 1073741823>
%ext = zext <2 x i32> %and to <2 x i64>
@@ -461,59 +457,37 @@ define amdgpu_ps <2 x i64> @s_shl_v2i64_zext_v2i32(<2 x i32> inreg %x) {
}
define <2 x i64> @v_shl_v2i64_zext_v2i32(<2 x i32> %x) {
-; GFX7-LABEL: v_shl_v2i64_zext_v2i32:
-; GFX7: ; %bb.0:
-; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_and_b32_e32 v2, 0x3fffffff, v1
-; GFX7-NEXT: v_mov_b32_e32 v1, 0
-; GFX7-NEXT: v_and_b32_e32 v0, 0x3fffffff, v0
-; GFX7-NEXT: v_mov_b32_e32 v3, v1
-; GFX7-NEXT: v_lshl_b64 v[0:1], v[0:1], 2
-; GFX7-NEXT: v_lshl_b64 v[2:3], v[2:3], 2
-; GFX7-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX8-LABEL: v_shl_v2i64_zext_v2i32:
-; GFX8: ; %bb.0:
-; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_and_b32_e32 v2, 0x3fffffff, v1
-; GFX8-NEXT: v_mov_b32_e32 v1, 0
-; GFX8-NEXT: v_and_b32_e32 v0, 0x3fffffff, v0
-; GFX8-NEXT: v_mov_b32_e32 v3, v1
-; GFX8-NEXT: v_lshlrev_b64 v[0:1], 2, v[0:1]
-; GFX8-NEXT: v_lshlrev_b64 v[2:3], 2, v[2:3]
-; GFX8-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-LABEL: v_shl_v2i64_zext_v2i32:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_and_b32_e32 v2, 0x3fffffff, v1
-; GFX9-NEXT: v_mov_b32_e32 v1, 0
-; GFX9-NEXT: v_and_b32_e32 v0, 0x3fffffff, v0
-; GFX9-NEXT: v_mov_b32_e32 v3, v1
-; GFX9-NEXT: v_lshlrev_b64 v[0:1], 2, v[0:1]
-; GFX9-NEXT: v_lshlrev_b64 v[2:3], 2, v[2:3]
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GCN-LABEL: v_shl_v2i64_zext_v2i32:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_and_b32_e32 v0, 0x3fffffff, v0
+; GCN-NEXT: v_and_b32_e32 v1, 0x3fffffff, v1
+; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GCN-NEXT: v_lshlrev_b32_e32 v2, 2, v1
+; GCN-NEXT: v_mov_b32_e32 v1, 0
+; GCN-NEXT: v_mov_b32_e32 v3, 0
+; GCN-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_shl_v2i64_zext_v2i32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX10-NEXT: v_and_b32_e32 v0, 0x3fffffff, v0
+; GFX10-NEXT: v_and_b32_e32 v1, 0x3fffffff, v1
; GFX10-NEXT: v_mov_b32_e32 v3, 0
-; GFX10-NEXT: v_and_b32_e32 v2, 0x3fffffff, v0
-; GFX10-NEXT: v_and_b32_e32 v4, 0x3fffffff, v1
-; GFX10-NEXT: v_mov_b32_e32 v5, v3
-; GFX10-NEXT: v_lshlrev_b64 v[0:1], 2, v[2:3]
-; GFX10-NEXT: v_lshlrev_b64 v[2:3], 2, v[4:5]
+; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX10-NEXT: v_lshlrev_b32_e32 v2, 2, v1
+; GFX10-NEXT: v_mov_b32_e32 v1, 0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_shl_v2i64_zext_v2i32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX11-NEXT: v_dual_mov_b32 v3, 0 :: v_dual_and_b32 v2, 0x3fffffff, v0
-; GFX11-NEXT: v_dual_mov_b32 v5, v3 :: v_dual_and_b32 v4, 0x3fffffff, v1
-; GFX11-NEXT: v_lshlrev_b64 v[0:1], 2, v[2:3]
-; GFX11-NEXT: v_lshlrev_b64 v[2:3], 2, v[4:5]
+; GFX11-NEXT: v_and_b32_e32 v0, 0x3fffffff, v0
+; GFX11-NEXT: v_and_b32_e32 v1, 0x3fffffff, v1
+; GFX11-NEXT: v_dual_mov_b32 v3, 0 :: v_dual_lshlrev_b32 v2, 2, v1
+; GFX11-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_lshlrev_b32 v0, 2, v0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%and = and <2 x i32> %x, <i32 1073741823, i32 1073741823>
%ext = zext <2 x i32> %and to <2 x i64>
@@ -527,12 +501,10 @@ define amdgpu_ps <2 x i64> @s_shl_v2i64_sext_v2i32(<2 x i32> inreg %x) {
; GCN-NEXT: s_brev_b32 s2, -8
; GCN-NEXT: s_mov_b32 s3, s2
; GCN-NEXT: s_and_b64 s[0:1], s[0:1], s[2:3]
-; GCN-NEXT: s_ashr_i32 s3, s0, 31
-; GCN-NEXT: s_mov_b32 s2, s0
-; GCN-NEXT: s_ashr_i32 s5, s1, 31
-; GCN-NEXT: s_mov_b32 s4, s1
-; GCN-NEXT: s_lshl_b64 s[0:1], s[2:3], 2
-; GCN-NEXT: s_lshl_b64 s[2:3], s[4:5], 2
+; GCN-NEXT: s_lshl_b32 s0, s0, 2
+; GCN-NEXT: s_lshl_b32 s2, s1, 2
+; GCN-NEXT: s_mov_b32 s1, 0
+; GCN-NEXT: s_mov_b32 s3, 0
; GCN-NEXT: ; return to shader part epilog
;
; GFX10PLUS-LABEL: s_shl_v2i64_sext_v2i32:
@@ -540,12 +512,10 @@ define amdgpu_ps <2 x i64> @s_shl_v2i64_sext_v2i32(<2 x i32> inreg %x) {
; GFX10PLUS-NEXT: s_brev_b32 s2, -8
; GFX10PLUS-NEXT: s_mov_b32 s3, s2
; GFX10PLUS-NEXT: s_and_b64 s[0:1], s[0:1], s[2:3]
-; GFX10PLUS-NEXT: s_ashr_i32 s3, s0, 31
-; GFX10PLUS-NEXT: s_mov_b32 s2, s0
-; GFX10PLUS-NEXT: s_ashr_i32 s5, s1, 31
-; GFX10PLUS-NEXT: s_mov_b32 s4, s1
-; GFX10PLUS-NEXT: s_lshl_b64 s[0:1], s[2:3], 2
-; GFX10PLUS-NEXT: s_lshl_b64 s[2:3], s[4:5], 2
+; GFX10PLUS-NEXT: s_mov_b32 s3, 0
+; GFX10PLUS-NEXT: s_lshl_b32 s0, s0, 2
+; GFX10PLUS-NEXT: s_lshl_b32 s2, s1, 2
+; GFX10PLUS-NEXT: s_mov_b32 s1, 0
; GFX10PLUS-NEXT: ; return to shader part epilog
%and = and <2 x i32> %x, <i32 536870911, i32 536870911>
%ext = sext <2 x i32> %and to <2 x i64>
@@ -554,50 +524,38 @@ define amdgpu_ps <2 x i64> @s_shl_v2i64_sext_v2i32(<2 x i32> inreg %x) {
}
define <2 x i64> @v_shl_v2i64_sext_v2i32(<2 x i32> %x) {
-; GFX7-LABEL: v_shl_v2i64_sext_v2i32:
-; GFX7: ; %bb.0:
-; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_and_b32_e32 v0, 0x1fffffff, v0
-; GFX7-NEXT: v_and_b32_e32 v2, 0x1fffffff, v1
-; GFX7-NEXT: v_ashrrev_i32_e32 v1, 31, v0
-; GFX7-NEXT: v_ashrrev_i32_e32 v3, 31, v2
-; GFX7-NEXT: v_lshl_b64 v[0:1], v[0:1], 2
-; GFX7-NEXT: v_lshl_b64 v[2:3], v[2:3], 2
-; GFX7-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX8-LABEL: v_shl_v2i64_sext_v2i32:
-; GFX8: ; %bb.0:
-; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_and_b32_e32 v0, 0x1fffffff, v0
-; GFX8-NEXT: v_and_b32_e32 v2, 0x1fffffff, v1
-; GFX8-NEXT: v_ashrrev_i32_e32 v1, 31, v0
-; GFX8-NEXT: v_ashrrev_i32_e32 v3, 31, v2
-; GFX8-NEXT: v_lshlrev_b64 v[0:1], 2, v[0:1]
-; GFX8-NEXT: v_lshlrev_b64 v[2:3], 2, v[2:3]
-; GFX8-NEXT: s_setpc_b64 s[30:31]
+; GCN-LABEL: v_shl_v2i64_sext_v2i32:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_and_b32_e32 v0, 0x1fffffff, v0
+; GCN-NEXT: v_and_b32_e32 v1, 0x1fffffff, v1
+; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GCN-NEXT: v_lshlrev_b32_e32 v2, 2, v1
+; GCN-NEXT: v_mov_b32_e32 v1, 0
+; GCN-NEXT: v_mov_b32_e32 v3, 0
+; GCN-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_shl_v2i64_sext_v2i32:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_and_b32_e32 v0, 0x1fffffff, v0
-; GFX9-NEXT: v_and_b32_e32 v2, 0x1fffffff, v1
-; GFX9-NEXT: v_ashrrev_i32_e32 v1, 31, v0
-; GFX9-NEXT: v_ashrrev_i32_e32 v3, 31, v2
-; GFX9-NEXT: v_lshlrev_b64 v[0:1], 2, v[0:1]
-; GFX9-NEXT: v_lshlrev_b64 v[2:3], 2, v[2:3]
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX10-LABEL: v_shl_v2i64_sext_v2i32:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX10-NEXT: v_and_b32_e32 v0, 0x1fffffff, v0
+; GFX10-NEXT: v_and_b32_e32 v1, 0x1fffffff, v1
+; GFX10-NEXT: v_mov_b32_e32 v3, 0
+; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX10-NEXT: v_lshlrev_b32_e32 v2, 2, v1
+; GFX10-NEXT: v_mov_b32_e32 v1, 0
+; GFX10-NEXT: s_setpc_b64 s[30:31]
;
-; GFX10PLUS-LABEL: v_shl_v2i64_sext_v2i32:
-; GFX10PLUS: ; %bb.0:
-; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10PLUS-NEXT: v_and_b32_e32 v0, 0x1fffffff, v0
-; GFX10PLUS-NEXT: v_and_b32_e32 v2, 0x1fffffff, v1
-; GFX10PLUS-NEXT: v_ashrrev_i32_e32 v1, 31, v0
-; GFX10PLUS-NEXT: v_ashrrev_i32_e32 v3, 31, v2
-; GFX10PLUS-NEXT: v_lshlrev_b64 v[0:1], 2, v[0:1]
-; GFX10PLUS-NEXT: v_lshlrev_b64 v[2:3], 2, v[2:3]
-; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
+; GFX11-LABEL: v_shl_v2i64_sext_v2i32:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-NEXT: v_and_b32_e32 v0, 0x1fffffff, v0
+; GFX11-NEXT: v_and_b32_e32 v1, 0x1fffffff, v1
+; GFX11-NEXT: v_dual_mov_b32 v3, 0 :: v_dual_lshlrev_b32 v2, 2, v1
+; GFX11-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_lshlrev_b32 v0, 2, v0
+; GFX11-NEXT: s_setpc_b64 s[30:31]
%and = and <2 x i32> %x, <i32 536870911, i32 536870911>
%ext = sext <2 x i32> %and to <2 x i64>
%shl = shl <2 x i64> %ext, <i64 2, i64 2>
@@ -680,9 +638,10 @@ define amdgpu_ps <2 x i32> @s_shl_v2i32_zext_v2i16(<2 x i16> inreg %x) {
; GFX7-NEXT: s_or_b32 s0, s1, s0
; GFX7-NEXT: s_and_b32 s0, s0, 0x3fff3fff
; GFX7-NEXT: s_lshr_b32 s1, s0, 16
-; GFX7-NEXT: s_and_b32 s0, s0, 0xffff
; GFX7-NEXT: s_lshl_b32 s0, s0, 2
; GFX7-NEXT: s_lshl_b32 s1, s1, 2
+; GFX7-NEXT: s_and_b32 s0, s0, 0xffff
+; GFX7-NEXT: s_and_b32 s1, s1, 0xffff
; GFX7-NEXT: ; return to shader part epilog
;
; GFX8-LABEL: s_shl_v2i32_zext_v2i16:
@@ -700,18 +659,22 @@ define amdgpu_ps <2 x i32> @s_shl_v2i32_zext_v2i16(<2 x i16> inreg %x) {
; GFX9: ; %bb.0:
; GFX9-NEXT: s_and_b32 s0, s0, 0x3fff3fff
; GFX9-NEXT: s_lshr_b32 s1, s0, 16
-; GFX9-NEXT: s_and_b32 s0, s0, 0xffff
-; GFX9-NEXT: s_lshl_b32 s0, s0, 2
+; GFX9-NEXT: s_lshl_b32 s0, s0, 0x20002
; GFX9-NEXT: s_lshl_b32 s1, s1, 2
+; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s1
+; GFX9-NEXT: s_lshr_b32 s1, s0, 16
+; GFX9-NEXT: s_and_b32 s0, s0, 0xffff
; GFX9-NEXT: ; return to shader part epilog
;
; GFX10PLUS-LABEL: s_shl_v2i32_zext_v2i16:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_and_b32 s0, s0, 0x3fff3fff
-; GFX10PLUS-NEXT: s_and_b32 s1, s0, 0xffff
-; GFX10PLUS-NEXT: s_lshr_b32 s2, s0, 16
-; GFX10PLUS-NEXT: s_lshl_b32 s0, s1, 2
-; GFX10PLUS-NEXT: s_lshl_b32 s1, s2, 2
+; GFX10PLUS-NEXT: s_lshr_b32 s1, s0, 16
+; GFX10PLUS-NEXT: s_lshl_b32 s0, s0, 0x20002
+; GFX10PLUS-NEXT: s_lshl_b32 s1, s1, 2
+; GFX10PLUS-NEXT: s_pack_ll_b32_b16 s1, s0, s1
+; GFX10PLUS-NEXT: s_and_b32 s0, s1, 0xffff
+; GFX10PLUS-NEXT: s_lshr_b32 s1, s1, 16
; GFX10PLUS-NEXT: ; return to shader part epilog
%and = and <2 x i16> %x, <i16 16383, i16 16383>
%ext = zext <2 x i16> %and to <2 x i32>
@@ -733,9 +696,10 @@ define <2 x i32> @v_shl_v2i32_zext_v2i16(<2 x i16> %x) {
; GFX7-NEXT: v_or_b32_e32 v0, v1, v0
; GFX7-NEXT: v_and_b32_e32 v0, 0x3fff3fff, v0
; GFX7-NEXT: v_lshrrev_b32_e32 v1, 16, v0
-; GFX7-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX7-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX7-NEXT: v_lshlrev_b32_e32 v1, 2, v1
+; GFX7-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX7-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_shl_v2i32_zext_v2i16:
@@ -743,39 +707,28 @@ define <2 x i32> @v_shl_v2i32_zext_v2i16(<2 x i16> %x) {
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_and_b32_e32 v1, 0x3fff3fff, v0
; GFX8-NEXT: v_mov_b32_e32 v2, 2
-; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX8-NEXT: v_lshlrev_b16_e32 v0, 2, v1
+; GFX8-NEXT: v_lshlrev_b16_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_shl_v2i32_zext_v2i16:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_mov_b32 s4, 2
-; GFX9-NEXT: v_and_b32_e32 v1, 0x3fff3fff, v0
-; GFX9-NEXT: v_lshlrev_b32_sdwa v0, s4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GFX9-NEXT: v_lshlrev_b32_sdwa v1, s4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX9-NEXT: v_and_b32_e32 v0, 0x3fff3fff, v0
+; GFX9-NEXT: v_pk_lshlrev_b16 v0, 2, v0 op_sel_hi:[0,1]
+; GFX9-NEXT: v_lshrrev_b32_e32 v1, 16, v0
+; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX10-LABEL: v_shl_v2i32_zext_v2i16:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT: v_and_b32_e32 v1, 0x3fff3fff, v0
-; GFX10-NEXT: s_mov_b32 s4, 2
-; GFX10-NEXT: v_lshlrev_b32_sdwa v0, s4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GFX10-NEXT: v_lshlrev_b32_sdwa v1, s4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX10-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-LABEL: v_shl_v2i32_zext_v2i16:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX11-NEXT: v_and_b32_e32 v0, 0x3fff3fff, v0
-; GFX11-NEXT: v_and_b32_e32 v1, 0xffff, v0
-; GFX11-NEXT: v_lshrrev_b32_e32 v2, 16, v0
-; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v1
-; GFX11-NEXT: v_lshlrev_b32_e32 v1, 2, v2
-; GFX11-NEXT: s_setpc_b64 s[30:31]
+; GFX10PLUS-LABEL: v_shl_v2i32_zext_v2i16:
+; GFX10PLUS: ; %bb.0:
+; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX10PLUS-NEXT: v_and_b32_e32 v0, 0x3fff3fff, v0
+; GFX10PLUS-NEXT: v_pk_lshlrev_b16 v1, 2, v0 op_sel_hi:[0,1]
+; GFX10PLUS-NEXT: v_and_b32_e32 v0, 0xffff, v1
+; GFX10PLUS-NEXT: v_lshrrev_b32_e32 v1, 16, v1
+; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
%and = and <2 x i16> %x, <i16 16383, i16 16383>
%ext = zext <2 x i16> %and to <2 x i32>
%shl = shl <2 x i32> %ext, <i32 2, i32 2>
More information about the llvm-commits
mailing list