[llvm] b7a305d - [AArch64][GlobalISel] Optimise Combine Funnel Shift
Tuan Chuong Goh via llvm-commits
llvm-commits at lists.llvm.org
Thu Sep 7 04:11:48 PDT 2023
Author: Tuan Chuong Goh
Date: 2023-09-07T11:58:12+01:00
New Revision: b7a305deca809e281e7b28a5714d708906821773
URL: https://github.com/llvm/llvm-project/commit/b7a305deca809e281e7b28a5714d708906821773
DIFF: https://github.com/llvm/llvm-project/commit/b7a305deca809e281e7b28a5714d708906821773.diff
LOG: [AArch64][GlobalISel] Optimise Combine Funnel Shift
Combine any funnel shift with a shift amount of 0 to a copy.
Modulo is applied to shift amount if it is larger than the
instruction's bitwidth.
Differential Revision: https://reviews.llvm.org/D157591
Added:
llvm/test/CodeGen/AArch64/GlobalISel/combine-fshl.mir
llvm/test/CodeGen/AArch64/GlobalISel/combine-fshr.mir
Modified:
llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
llvm/include/llvm/Target/GlobalISel/Combine.td
llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
llvm/test/CodeGen/AArch64/funnel-shift.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/fshl.ll
Removed:
################################################################################
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index 9b464c7e1a19b99..bb7b8654045d7ff 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -451,6 +451,10 @@ class CombinerHelper {
/// Delete \p MI and replace all of its uses with \p Replacement.
void replaceSingleDefInstWithReg(MachineInstr &MI, Register Replacement);
+ /// @brief Replaces the shift amount in \p MI with ShiftAmt % BW
+ /// @param MI
+ void applyFunnelShiftConstantModulo(MachineInstr &MI);
+
/// Return true if \p MOP1 and \p MOP2 are register operands are defined by
/// equivalent instructions.
bool matchEqualDefs(const MachineOperand &MOP1, const MachineOperand &MOP2);
@@ -463,6 +467,10 @@ class CombinerHelper {
/// equal to \p C.
bool matchConstantFPOp(const MachineOperand &MOP, double C);
+ /// @brief Checks if constant at \p ConstIdx is larger than \p MI 's bitwidth
+ /// @param ConstIdx Index of the constant
+ bool matchConstantLargerBitWidth(MachineInstr &MI, unsigned ConstIdx);
+
/// Optimize (cond ? x : x) -> x
bool matchSelectSameVal(MachineInstr &MI);
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index fd73a5995355f57..c97658bc9c25c6d 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -855,6 +855,28 @@ def funnel_shift_to_rotate : GICombineRule<
(apply [{ Helper.applyFunnelShiftToRotate(*${root}); }])
>;
+// Fold fshr x, y, 0 -> y
+def funnel_shift_right_zero: GICombineRule<
+ (defs root:$root),
+ (match (G_FSHR $x, $y, $z, 0):$root),
+ (apply (COPY $x, $z))
+>;
+
+// Fold fshl x, y, 0 -> x
+def funnel_shift_left_zero: GICombineRule<
+ (defs root:$root),
+ (match (G_FSHL $x, $y, $z, 0):$root),
+ (apply (COPY $x, $y))
+>;
+
+// Fold fsh(l/r) x, y, C -> fsh(l/r) x, y, C % bw
+def funnel_shift_overshift: GICombineRule<
+ (defs root:$root),
+ (match (wip_match_opcode G_FSHL, G_FSHR):$root,
+ [{ return Helper.matchConstantLargerBitWidth(*${root}, 3); }]),
+ (apply [{ Helper.applyFunnelShiftConstantModulo(*${root}); }])
+>;
+
def rotate_out_of_range : GICombineRule<
(defs root:$root),
(match (wip_match_opcode G_ROTR, G_ROTL):$root,
@@ -893,7 +915,10 @@ def bitfield_extract_from_and : GICombineRule<
(apply [{ Helper.applyBuildFn(*${root}, ${info}); }])>;
def funnel_shift_combines : GICombineGroup<[funnel_shift_from_or_shift,
- funnel_shift_to_rotate]>;
+ funnel_shift_to_rotate,
+ funnel_shift_right_zero,
+ funnel_shift_left_zero,
+ funnel_shift_overshift]>;
def bitfield_extract_from_sext_inreg : GICombineRule<
(defs root:$root, build_fn_matchinfo:$info),
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index 836a79607cef87d..1225eb11f10ede4 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -2625,6 +2625,45 @@ void CombinerHelper::replaceSingleDefInstWithReg(MachineInstr &MI,
replaceRegWith(MRI, OldReg, Replacement);
}
+bool CombinerHelper::matchConstantLargerBitWidth(MachineInstr &MI,
+ unsigned ConstIdx) {
+ Register ConstReg = MI.getOperand(ConstIdx).getReg();
+ LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
+
+ // Get the shift amount
+ auto VRegAndVal = getIConstantVRegValWithLookThrough(ConstReg, MRI);
+ if (!VRegAndVal)
+ return false;
+
+ // Return true of shift amount >= Bitwidth
+ return (VRegAndVal->Value.uge(DstTy.getSizeInBits()));
+}
+
+void CombinerHelper::applyFunnelShiftConstantModulo(MachineInstr &MI) {
+ assert((MI.getOpcode() == TargetOpcode::G_FSHL ||
+ MI.getOpcode() == TargetOpcode::G_FSHR) &&
+ "This is not a funnel shift operation");
+
+ Register ConstReg = MI.getOperand(3).getReg();
+ LLT ConstTy = MRI.getType(ConstReg);
+ LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
+
+ auto VRegAndVal = getIConstantVRegValWithLookThrough(ConstReg, MRI);
+ assert((VRegAndVal) && "Value is not a constant");
+
+ // Calculate the new Shift Amount = Old Shift Amount % BitWidth
+ APInt NewConst = VRegAndVal->Value.urem(
+ APInt(ConstTy.getSizeInBits(), DstTy.getScalarSizeInBits()));
+
+ Builder.setInstrAndDebugLoc(MI);
+ auto NewConstInstr = Builder.buildConstant(ConstTy, NewConst.getZExtValue());
+ Builder.buildInstr(
+ MI.getOpcode(), {MI.getOperand(0)},
+ {MI.getOperand(1), MI.getOperand(2), NewConstInstr.getReg(0)});
+
+ MI.eraseFromParent();
+}
+
bool CombinerHelper::matchSelectSameVal(MachineInstr &MI) {
assert(MI.getOpcode() == TargetOpcode::G_SELECT);
// Match (cond ? x : x)
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-fshl.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-fshl.mir
new file mode 100644
index 000000000000000..176cf88ed5a96ec
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-fshl.mir
@@ -0,0 +1,452 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 2
+# RUN: llc -mtriple=arm64-unknown-unknown -global-isel -run-pass=aarch64-prelegalizer-combiner %s -o - | FileCheck %s
+
+---
+name: fshl_i8
+alignment: 4
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $w0, $w1, $w2
+
+ ; CHECK-LABEL: name: fshl_i8
+ ; CHECK: liveins: $w0, $w1, $w2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32)
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
+ ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s32)
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $w2
+ ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[COPY2]](s32)
+ ; CHECK-NEXT: [[FSHL:%[0-9]+]]:_(s8) = G_FSHL [[TRUNC]], [[TRUNC1]], [[TRUNC2]](s8)
+ ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FSHL]](s8)
+ ; CHECK-NEXT: $w0 = COPY [[ANYEXT]](s32)
+ ; CHECK-NEXT: RET_ReallyLR implicit $w0
+ %3:_(s32) = COPY $w0
+ %0:_(s8) = G_TRUNC %3(s32)
+ %4:_(s32) = COPY $w1
+ %1:_(s8) = G_TRUNC %4(s32)
+ %5:_(s32) = COPY $w2
+ %2:_(s8) = G_TRUNC %5(s32)
+ %6:_(s8) = G_FSHL %0, %1, %2(s8)
+ %7:_(s32) = G_ANYEXT %6(s8)
+ $w0 = COPY %7(s32)
+ RET_ReallyLR implicit $w0
+
+...
+
+---
+name: fshl_i16
+alignment: 4
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $w0, $w1, $w2
+
+ ; CHECK-LABEL: name: fshl_i16
+ ; CHECK: liveins: $w0, $w1, $w2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
+ ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $w2
+ ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32)
+ ; CHECK-NEXT: [[FSHL:%[0-9]+]]:_(s16) = G_FSHL [[TRUNC]], [[TRUNC1]], [[TRUNC2]](s16)
+ ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FSHL]](s16)
+ ; CHECK-NEXT: $w0 = COPY [[ANYEXT]](s32)
+ ; CHECK-NEXT: RET_ReallyLR implicit $w0
+ %3:_(s32) = COPY $w0
+ %0:_(s16) = G_TRUNC %3(s32)
+ %4:_(s32) = COPY $w1
+ %1:_(s16) = G_TRUNC %4(s32)
+ %5:_(s32) = COPY $w2
+ %2:_(s16) = G_TRUNC %5(s32)
+ %6:_(s16) = G_FSHL %0, %1, %2(s16)
+ %7:_(s32) = G_ANYEXT %6(s16)
+ $w0 = COPY %7(s32)
+ RET_ReallyLR implicit $w0
+
+...
+
+---
+name: fshl_i32
+alignment: 4
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $w0, $w1, $w2
+
+ ; CHECK-LABEL: name: fshl_i32
+ ; CHECK: liveins: $w0, $w1, $w2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $w2
+ ; CHECK-NEXT: [[FSHL:%[0-9]+]]:_(s32) = G_FSHL [[COPY]], [[COPY1]], [[COPY2]](s32)
+ ; CHECK-NEXT: $w0 = COPY [[FSHL]](s32)
+ ; CHECK-NEXT: RET_ReallyLR implicit $w0
+ %0:_(s32) = COPY $w0
+ %1:_(s32) = COPY $w1
+ %2:_(s32) = COPY $w2
+ %3:_(s32) = G_FSHL %0, %1, %2(s32)
+ $w0 = COPY %3(s32)
+ RET_ReallyLR implicit $w0
+
+...
+
+---
+name: fshl_i64
+alignment: 4
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $x0, $x1, $x2
+
+ ; CHECK-LABEL: name: fshl_i64
+ ; CHECK: liveins: $x0, $x1, $x2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2
+ ; CHECK-NEXT: [[FSHL:%[0-9]+]]:_(s64) = G_FSHL [[COPY]], [[COPY1]], [[COPY2]](s64)
+ ; CHECK-NEXT: $x0 = COPY [[FSHL]](s64)
+ ; CHECK-NEXT: RET_ReallyLR implicit $x0
+ %0:_(s64) = COPY $x0
+ %1:_(s64) = COPY $x1
+ %2:_(s64) = COPY $x2
+ %3:_(s64) = G_FSHL %0, %1, %2(s64)
+ $x0 = COPY %3(s64)
+ RET_ReallyLR implicit $x0
+
+...
+
+---
+name: fshl_i8_const_shift
+alignment: 4
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $w0, $w1
+
+ ; CHECK-LABEL: name: fshl_i8_const_shift
+ ; CHECK: liveins: $w0, $w1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32)
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
+ ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s32)
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 5
+ ; CHECK-NEXT: [[FSHL:%[0-9]+]]:_(s8) = G_FSHL [[TRUNC]], [[TRUNC1]], [[C]](s8)
+ ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FSHL]](s8)
+ ; CHECK-NEXT: $w0 = COPY [[ANYEXT]](s32)
+ ; CHECK-NEXT: RET_ReallyLR implicit $w0
+ %2:_(s32) = COPY $w0
+ %0:_(s8) = G_TRUNC %2(s32)
+ %3:_(s32) = COPY $w1
+ %1:_(s8) = G_TRUNC %3(s32)
+ %4:_(s8) = G_CONSTANT i8 5
+ %5:_(s8) = G_FSHL %0, %1, %4(s8)
+ %6:_(s32) = G_ANYEXT %5(s8)
+ $w0 = COPY %6(s32)
+ RET_ReallyLR implicit $w0
+
+...
+
+---
+name: fshl_i8_const_overshift
+alignment: 4
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $w0, $w1
+
+ ; CHECK-LABEL: name: fshl_i8_const_overshift
+ ; CHECK: liveins: $w0, $w1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32)
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
+ ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s32)
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 2
+ ; CHECK-NEXT: [[FSHL:%[0-9]+]]:_(s8) = G_FSHL [[TRUNC]], [[TRUNC1]], [[C]](s8)
+ ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FSHL]](s8)
+ ; CHECK-NEXT: $w0 = COPY [[ANYEXT]](s32)
+ ; CHECK-NEXT: RET_ReallyLR implicit $w0
+ %2:_(s32) = COPY $w0
+ %0:_(s8) = G_TRUNC %2(s32)
+ %3:_(s32) = COPY $w1
+ %1:_(s8) = G_TRUNC %3(s32)
+ %4:_(s8) = G_CONSTANT i8 10
+ %5:_(s8) = G_FSHL %0, %1, %4(s8)
+ %6:_(s32) = G_ANYEXT %5(s8)
+ $w0 = COPY %6(s32)
+ RET_ReallyLR implicit $w0
+
+...
+
+---
+name: fshl_i8_shift_by_bidwidth
+alignment: 4
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $w0, $w1
+
+ ; CHECK-LABEL: name: fshl_i8_shift_by_bidwidth
+ ; CHECK: liveins: $w0, $w1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+ ; CHECK-NEXT: $w0 = COPY [[COPY]](s32)
+ ; CHECK-NEXT: RET_ReallyLR implicit $w0
+ %2:_(s32) = COPY $w0
+ %0:_(s8) = G_TRUNC %2(s32)
+ %3:_(s32) = COPY $w1
+ %1:_(s8) = G_TRUNC %3(s32)
+ %4:_(s8) = G_CONSTANT i8 8
+ %5:_(s8) = G_FSHL %0, %1, %4(s8)
+ %6:_(s32) = G_ANYEXT %5(s8)
+ $w0 = COPY %6(s32)
+ RET_ReallyLR implicit $w0
+
+...
+
+---
+name: fshl_i16_const_shift
+alignment: 4
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $w0, $w1
+
+ ; CHECK-LABEL: name: fshl_i16_const_shift
+ ; CHECK: liveins: $w0, $w1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
+ ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 5
+ ; CHECK-NEXT: [[FSHL:%[0-9]+]]:_(s16) = G_FSHL [[TRUNC]], [[TRUNC1]], [[C]](s16)
+ ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FSHL]](s16)
+ ; CHECK-NEXT: $w0 = COPY [[ANYEXT]](s32)
+ ; CHECK-NEXT: RET_ReallyLR implicit $w0
+ %2:_(s32) = COPY $w0
+ %0:_(s16) = G_TRUNC %2(s32)
+ %3:_(s32) = COPY $w1
+ %1:_(s16) = G_TRUNC %3(s32)
+ %4:_(s16) = G_CONSTANT i16 5
+ %5:_(s16) = G_FSHL %0, %1, %4(s16)
+ %6:_(s32) = G_ANYEXT %5(s16)
+ $w0 = COPY %6(s32)
+ RET_ReallyLR implicit $w0
+
+...
+
+---
+name: fshl_i16_const_overshift
+alignment: 4
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $w0, $w1
+
+ ; CHECK-LABEL: name: fshl_i16_const_overshift
+ ; CHECK: liveins: $w0, $w1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
+ ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 4
+ ; CHECK-NEXT: [[FSHL:%[0-9]+]]:_(s16) = G_FSHL [[TRUNC]], [[TRUNC1]], [[C]](s16)
+ ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FSHL]](s16)
+ ; CHECK-NEXT: $w0 = COPY [[ANYEXT]](s32)
+ ; CHECK-NEXT: RET_ReallyLR implicit $w0
+ %2:_(s32) = COPY $w0
+ %0:_(s16) = G_TRUNC %2(s32)
+ %3:_(s32) = COPY $w1
+ %1:_(s16) = G_TRUNC %3(s32)
+ %4:_(s16) = G_CONSTANT i16 20
+ %5:_(s16) = G_FSHL %0, %1, %4(s16)
+ %6:_(s32) = G_ANYEXT %5(s16)
+ $w0 = COPY %6(s32)
+ RET_ReallyLR implicit $w0
+
+...
+
+---
+name: fshl_i16_shift_by_bidwidth
+alignment: 4
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $w0, $w1
+
+ ; CHECK-LABEL: name: fshl_i16_shift_by_bidwidth
+ ; CHECK: liveins: $w0, $w1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+ ; CHECK-NEXT: $w0 = COPY [[COPY]](s32)
+ ; CHECK-NEXT: RET_ReallyLR implicit $w0
+ %2:_(s32) = COPY $w0
+ %0:_(s16) = G_TRUNC %2(s32)
+ %3:_(s32) = COPY $w1
+ %1:_(s16) = G_TRUNC %3(s32)
+ %4:_(s16) = G_CONSTANT i16 16
+ %5:_(s16) = G_FSHL %0, %1, %4(s16)
+ %6:_(s32) = G_ANYEXT %5(s16)
+ $w0 = COPY %6(s32)
+ RET_ReallyLR implicit $w0
+
+...
+
+---
+name: fshl_i32_const_shift
+alignment: 4
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $w0, $w1
+
+ ; CHECK-LABEL: name: fshl_i32_const_shift
+ ; CHECK: liveins: $w0, $w1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
+ ; CHECK-NEXT: [[FSHL:%[0-9]+]]:_(s32) = G_FSHL [[COPY]], [[COPY1]], [[C]](s32)
+ ; CHECK-NEXT: $w0 = COPY [[FSHL]](s32)
+ ; CHECK-NEXT: RET_ReallyLR implicit $w0
+ %0:_(s32) = COPY $w0
+ %1:_(s32) = COPY $w1
+ %2:_(s32) = G_CONSTANT i32 5
+ %3:_(s32) = G_FSHL %0, %1, %2(s32)
+ $w0 = COPY %3(s32)
+ RET_ReallyLR implicit $w0
+
+...
+
+---
+name: fshl_i32_const_overshift
+alignment: 4
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $w0, $w1
+
+ ; CHECK-LABEL: name: fshl_i32_const_overshift
+ ; CHECK: liveins: $w0, $w1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+ ; CHECK-NEXT: [[FSHL:%[0-9]+]]:_(s32) = G_FSHL [[COPY]], [[COPY1]], [[C]](s32)
+ ; CHECK-NEXT: $w0 = COPY [[FSHL]](s32)
+ ; CHECK-NEXT: RET_ReallyLR implicit $w0
+ %0:_(s32) = COPY $w0
+ %1:_(s32) = COPY $w1
+ %2:_(s32) = G_CONSTANT i32 42
+ %3:_(s32) = G_FSHL %0, %1, %2(s32)
+ $w0 = COPY %3(s32)
+ RET_ReallyLR implicit $w0
+
+...
+
+---
+name: fshl_i32_shift_by_bidwidth
+alignment: 4
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $w0, $w1
+
+ ; CHECK-LABEL: name: fshl_i32_shift_by_bidwidth
+ ; CHECK: liveins: $w0, $w1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+ ; CHECK-NEXT: $w0 = COPY [[COPY]](s32)
+ ; CHECK-NEXT: RET_ReallyLR implicit $w0
+ %0:_(s32) = COPY $w0
+ %1:_(s32) = COPY $w1
+ %2:_(s32) = G_CONSTANT i32 32
+ %3:_(s32) = G_FSHL %0, %1, %2(s32)
+ $w0 = COPY %3(s32)
+ RET_ReallyLR implicit $w0
+
+...
+
+---
+name: fshl_i64_const_shift
+alignment: 4
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $x0, $x1
+
+ ; CHECK-LABEL: name: fshl_i64_const_shift
+ ; CHECK: liveins: $x0, $x1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
+ ; CHECK-NEXT: [[FSHL:%[0-9]+]]:_(s64) = G_FSHL [[COPY]], [[COPY1]], [[C]](s64)
+ ; CHECK-NEXT: $x0 = COPY [[FSHL]](s64)
+ ; CHECK-NEXT: RET_ReallyLR implicit $x0
+ %0:_(s64) = COPY $x0
+ %1:_(s64) = COPY $x1
+ %2:_(s64) = G_CONSTANT i64 5
+ %3:_(s64) = G_FSHL %0, %1, %2(s64)
+ $x0 = COPY %3(s64)
+ RET_ReallyLR implicit $x0
+
+...
+
+---
+name: fshl_i64_const_overshift
+alignment: 4
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $x0, $x1
+
+ ; CHECK-LABEL: name: fshl_i64_const_overshift
+ ; CHECK: liveins: $x0, $x1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+ ; CHECK-NEXT: [[FSHL:%[0-9]+]]:_(s64) = G_FSHL [[COPY]], [[COPY1]], [[C]](s64)
+ ; CHECK-NEXT: $x0 = COPY [[FSHL]](s64)
+ ; CHECK-NEXT: RET_ReallyLR implicit $x0
+ %0:_(s64) = COPY $x0
+ %1:_(s64) = COPY $x1
+ %2:_(s64) = G_CONSTANT i64 72
+ %3:_(s64) = G_FSHL %0, %1, %2(s64)
+ $x0 = COPY %3(s64)
+ RET_ReallyLR implicit $x0
+
+...
+
+---
+name: fshl_i64_shift_by_bidwidth
+alignment: 4
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $x0, $x1
+
+ ; CHECK-LABEL: name: fshl_i64_shift_by_bidwidth
+ ; CHECK: liveins: $x0, $x1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
+ ; CHECK-NEXT: $x0 = COPY [[COPY]](s64)
+ ; CHECK-NEXT: RET_ReallyLR implicit $x0
+ %0:_(s64) = COPY $x0
+ %1:_(s64) = COPY $x1
+ %2:_(s64) = G_CONSTANT i64 64
+ %3:_(s64) = G_FSHL %0, %1, %2(s64)
+ $x0 = COPY %3(s64)
+ RET_ReallyLR implicit $x0
+
+...
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-fshr.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-fshr.mir
new file mode 100644
index 000000000000000..416a9a444a0b44d
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-fshr.mir
@@ -0,0 +1,452 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 2
+# RUN: llc -mtriple=arm64-unknown-unknown -global-isel -run-pass=aarch64-prelegalizer-combiner %s -o - | FileCheck %s
+
+---
+name: fshr_i8
+alignment: 4
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $w0, $w1, $w2
+
+ ; CHECK-LABEL: name: fshr_i8
+ ; CHECK: liveins: $w0, $w1, $w2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32)
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
+ ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s32)
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $w2
+ ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[COPY2]](s32)
+ ; CHECK-NEXT: [[FSHR:%[0-9]+]]:_(s8) = G_FSHR [[TRUNC]], [[TRUNC1]], [[TRUNC2]](s8)
+ ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FSHR]](s8)
+ ; CHECK-NEXT: $w0 = COPY [[ANYEXT]](s32)
+ ; CHECK-NEXT: RET_ReallyLR implicit $w0
+ %3:_(s32) = COPY $w0
+ %0:_(s8) = G_TRUNC %3(s32)
+ %4:_(s32) = COPY $w1
+ %1:_(s8) = G_TRUNC %4(s32)
+ %5:_(s32) = COPY $w2
+ %2:_(s8) = G_TRUNC %5(s32)
+ %6:_(s8) = G_FSHR %0, %1, %2(s8)
+ %7:_(s32) = G_ANYEXT %6(s8)
+ $w0 = COPY %7(s32)
+ RET_ReallyLR implicit $w0
+
+...
+
+---
+name: fshr_i16
+alignment: 4
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $w0, $w1, $w2
+
+ ; CHECK-LABEL: name: fshr_i16
+ ; CHECK: liveins: $w0, $w1, $w2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
+ ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $w2
+ ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32)
+ ; CHECK-NEXT: [[FSHR:%[0-9]+]]:_(s16) = G_FSHR [[TRUNC]], [[TRUNC1]], [[TRUNC2]](s16)
+ ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FSHR]](s16)
+ ; CHECK-NEXT: $w0 = COPY [[ANYEXT]](s32)
+ ; CHECK-NEXT: RET_ReallyLR implicit $w0
+ %3:_(s32) = COPY $w0
+ %0:_(s16) = G_TRUNC %3(s32)
+ %4:_(s32) = COPY $w1
+ %1:_(s16) = G_TRUNC %4(s32)
+ %5:_(s32) = COPY $w2
+ %2:_(s16) = G_TRUNC %5(s32)
+ %6:_(s16) = G_FSHR %0, %1, %2(s16)
+ %7:_(s32) = G_ANYEXT %6(s16)
+ $w0 = COPY %7(s32)
+ RET_ReallyLR implicit $w0
+
+...
+
+---
+name: fshr_i32
+alignment: 4
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $w0, $w1, $w2
+
+ ; CHECK-LABEL: name: fshr_i32
+ ; CHECK: liveins: $w0, $w1, $w2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $w2
+ ; CHECK-NEXT: [[FSHR:%[0-9]+]]:_(s32) = G_FSHR [[COPY]], [[COPY1]], [[COPY2]](s32)
+ ; CHECK-NEXT: $w0 = COPY [[FSHR]](s32)
+ ; CHECK-NEXT: RET_ReallyLR implicit $w0
+ %0:_(s32) = COPY $w0
+ %1:_(s32) = COPY $w1
+ %2:_(s32) = COPY $w2
+ %3:_(s32) = G_FSHR %0, %1, %2(s32)
+ $w0 = COPY %3(s32)
+ RET_ReallyLR implicit $w0
+
+...
+
+---
+name: fshr_i64
+alignment: 4
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $x0, $x1, $x2
+
+ ; CHECK-LABEL: name: fshr_i64
+ ; CHECK: liveins: $x0, $x1, $x2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2
+ ; CHECK-NEXT: [[FSHR:%[0-9]+]]:_(s64) = G_FSHR [[COPY]], [[COPY1]], [[COPY2]](s64)
+ ; CHECK-NEXT: $x0 = COPY [[FSHR]](s64)
+ ; CHECK-NEXT: RET_ReallyLR implicit $x0
+ %0:_(s64) = COPY $x0
+ %1:_(s64) = COPY $x1
+ %2:_(s64) = COPY $x2
+ %3:_(s64) = G_FSHR %0, %1, %2(s64)
+ $x0 = COPY %3(s64)
+ RET_ReallyLR implicit $x0
+
+...
+
+---
+name: fshr_i8_const_shift
+alignment: 4
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $w0, $w1
+
+ ; CHECK-LABEL: name: fshr_i8_const_shift
+ ; CHECK: liveins: $w0, $w1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32)
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
+ ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s32)
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 5
+ ; CHECK-NEXT: [[FSHR:%[0-9]+]]:_(s8) = G_FSHR [[TRUNC]], [[TRUNC1]], [[C]](s8)
+ ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FSHR]](s8)
+ ; CHECK-NEXT: $w0 = COPY [[ANYEXT]](s32)
+ ; CHECK-NEXT: RET_ReallyLR implicit $w0
+ %2:_(s32) = COPY $w0
+ %0:_(s8) = G_TRUNC %2(s32)
+ %3:_(s32) = COPY $w1
+ %1:_(s8) = G_TRUNC %3(s32)
+ %4:_(s8) = G_CONSTANT i8 5
+ %5:_(s8) = G_FSHR %0, %1, %4(s8)
+ %6:_(s32) = G_ANYEXT %5(s8)
+ $w0 = COPY %6(s32)
+ RET_ReallyLR implicit $w0
+
+...
+
+---
+name: fshr_i8_const_overshift
+alignment: 4
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $w0, $w1
+
+ ; CHECK-LABEL: name: fshr_i8_const_overshift
+ ; CHECK: liveins: $w0, $w1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32)
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
+ ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s32)
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 2
+ ; CHECK-NEXT: [[FSHR:%[0-9]+]]:_(s8) = G_FSHR [[TRUNC]], [[TRUNC1]], [[C]](s8)
+ ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FSHR]](s8)
+ ; CHECK-NEXT: $w0 = COPY [[ANYEXT]](s32)
+ ; CHECK-NEXT: RET_ReallyLR implicit $w0
+ %2:_(s32) = COPY $w0
+ %0:_(s8) = G_TRUNC %2(s32)
+ %3:_(s32) = COPY $w1
+ %1:_(s8) = G_TRUNC %3(s32)
+ %4:_(s8) = G_CONSTANT i8 10
+ %5:_(s8) = G_FSHR %0, %1, %4(s8)
+ %6:_(s32) = G_ANYEXT %5(s8)
+ $w0 = COPY %6(s32)
+ RET_ReallyLR implicit $w0
+
+...
+
+---
+name: fshr_i8_shift_by_bidwidth
+alignment: 4
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $w0, $w1
+
+ ; CHECK-LABEL: name: fshr_i8_shift_by_bidwidth
+ ; CHECK: liveins: $w0, $w1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w1
+ ; CHECK-NEXT: $w0 = COPY [[COPY]](s32)
+ ; CHECK-NEXT: RET_ReallyLR implicit $w0
+ %2:_(s32) = COPY $w0
+ %0:_(s8) = G_TRUNC %2(s32)
+ %3:_(s32) = COPY $w1
+ %1:_(s8) = G_TRUNC %3(s32)
+ %4:_(s8) = G_CONSTANT i8 8
+ %5:_(s8) = G_FSHR %0, %1, %4(s8)
+ %6:_(s32) = G_ANYEXT %5(s8)
+ $w0 = COPY %6(s32)
+ RET_ReallyLR implicit $w0
+
+...
+
+---
+name: fshr_i16_const_shift
+alignment: 4
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $w0, $w1
+
+ ; CHECK-LABEL: name: fshr_i16_const_shift
+ ; CHECK: liveins: $w0, $w1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
+ ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 5
+ ; CHECK-NEXT: [[FSHR:%[0-9]+]]:_(s16) = G_FSHR [[TRUNC]], [[TRUNC1]], [[C]](s16)
+ ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FSHR]](s16)
+ ; CHECK-NEXT: $w0 = COPY [[ANYEXT]](s32)
+ ; CHECK-NEXT: RET_ReallyLR implicit $w0
+ %2:_(s32) = COPY $w0
+ %0:_(s16) = G_TRUNC %2(s32)
+ %3:_(s32) = COPY $w1
+ %1:_(s16) = G_TRUNC %3(s32)
+ %4:_(s16) = G_CONSTANT i16 5
+ %5:_(s16) = G_FSHR %0, %1, %4(s16)
+ %6:_(s32) = G_ANYEXT %5(s16)
+ $w0 = COPY %6(s32)
+ RET_ReallyLR implicit $w0
+
+...
+
+---
+name: fshr_i16_const_overshift
+alignment: 4
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $w0, $w1
+
+ ; CHECK-LABEL: name: fshr_i16_const_overshift
+ ; CHECK: liveins: $w0, $w1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
+ ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 4
+ ; CHECK-NEXT: [[FSHR:%[0-9]+]]:_(s16) = G_FSHR [[TRUNC]], [[TRUNC1]], [[C]](s16)
+ ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FSHR]](s16)
+ ; CHECK-NEXT: $w0 = COPY [[ANYEXT]](s32)
+ ; CHECK-NEXT: RET_ReallyLR implicit $w0
+ %2:_(s32) = COPY $w0
+ %0:_(s16) = G_TRUNC %2(s32)
+ %3:_(s32) = COPY $w1
+ %1:_(s16) = G_TRUNC %3(s32)
+ %4:_(s16) = G_CONSTANT i16 20
+ %5:_(s16) = G_FSHR %0, %1, %4(s16)
+ %6:_(s32) = G_ANYEXT %5(s16)
+ $w0 = COPY %6(s32)
+ RET_ReallyLR implicit $w0
+
+...
+
+---
+name: fshr_i16_shift_by_bidwidth
+alignment: 4
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $w0, $w1
+
+ ; CHECK-LABEL: name: fshr_i16_shift_by_bidwidth
+ ; CHECK: liveins: $w0, $w1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w1
+ ; CHECK-NEXT: $w0 = COPY [[COPY]](s32)
+ ; CHECK-NEXT: RET_ReallyLR implicit $w0
+ %2:_(s32) = COPY $w0
+ %0:_(s16) = G_TRUNC %2(s32)
+ %3:_(s32) = COPY $w1
+ %1:_(s16) = G_TRUNC %3(s32)
+ %4:_(s16) = G_CONSTANT i16 16
+ %5:_(s16) = G_FSHR %0, %1, %4(s16)
+ %6:_(s32) = G_ANYEXT %5(s16)
+ $w0 = COPY %6(s32)
+ RET_ReallyLR implicit $w0
+
+...
+
+---
+name: fshr_i32_const_shift
+alignment: 4
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $w0, $w1
+
+ ; CHECK-LABEL: name: fshr_i32_const_shift
+ ; CHECK: liveins: $w0, $w1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
+ ; CHECK-NEXT: [[FSHR:%[0-9]+]]:_(s32) = G_FSHR [[COPY]], [[COPY1]], [[C]](s32)
+ ; CHECK-NEXT: $w0 = COPY [[FSHR]](s32)
+ ; CHECK-NEXT: RET_ReallyLR implicit $w0
+ %0:_(s32) = COPY $w0
+ %1:_(s32) = COPY $w1
+ %2:_(s32) = G_CONSTANT i32 5
+ %3:_(s32) = G_FSHR %0, %1, %2(s32)
+ $w0 = COPY %3(s32)
+ RET_ReallyLR implicit $w0
+
+...
+
+---
+name: fshr_i32_const_overshift
+alignment: 4
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $w0, $w1
+
+ ; CHECK-LABEL: name: fshr_i32_const_overshift
+ ; CHECK: liveins: $w0, $w1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+ ; CHECK-NEXT: [[FSHR:%[0-9]+]]:_(s32) = G_FSHR [[COPY]], [[COPY1]], [[C]](s32)
+ ; CHECK-NEXT: $w0 = COPY [[FSHR]](s32)
+ ; CHECK-NEXT: RET_ReallyLR implicit $w0
+ %0:_(s32) = COPY $w0
+ %1:_(s32) = COPY $w1
+ %2:_(s32) = G_CONSTANT i32 42
+ %3:_(s32) = G_FSHR %0, %1, %2(s32)
+ $w0 = COPY %3(s32)
+ RET_ReallyLR implicit $w0
+
+...
+
+---
+name: fshr_i32_shift_by_bidwidth
+alignment: 4
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $w0, $w1
+
+ ; CHECK-LABEL: name: fshr_i32_shift_by_bidwidth
+ ; CHECK: liveins: $w0, $w1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w1
+ ; CHECK-NEXT: $w0 = COPY [[COPY]](s32)
+ ; CHECK-NEXT: RET_ReallyLR implicit $w0
+ %0:_(s32) = COPY $w0
+ %1:_(s32) = COPY $w1
+ %2:_(s32) = G_CONSTANT i32 32
+ %3:_(s32) = G_FSHR %0, %1, %2(s32)
+ $w0 = COPY %3(s32)
+ RET_ReallyLR implicit $w0
+
+...
+
+---
+name: fshr_i64_const_shift
+alignment: 4
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $x0, $x1
+
+ ; CHECK-LABEL: name: fshr_i64_const_shift
+ ; CHECK: liveins: $x0, $x1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
+ ; CHECK-NEXT: [[FSHR:%[0-9]+]]:_(s64) = G_FSHR [[COPY]], [[COPY1]], [[C]](s64)
+ ; CHECK-NEXT: $x0 = COPY [[FSHR]](s64)
+ ; CHECK-NEXT: RET_ReallyLR implicit $x0
+ %0:_(s64) = COPY $x0
+ %1:_(s64) = COPY $x1
+ %2:_(s64) = G_CONSTANT i64 5
+ %3:_(s64) = G_FSHR %0, %1, %2(s64)
+ $x0 = COPY %3(s64)
+ RET_ReallyLR implicit $x0
+
+...
+
+---
+name: fshr_i64_const_overshift
+alignment: 4
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $x0, $x1
+
+ ; CHECK-LABEL: name: fshr_i64_const_overshift
+ ; CHECK: liveins: $x0, $x1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+ ; CHECK-NEXT: [[FSHR:%[0-9]+]]:_(s64) = G_FSHR [[COPY]], [[COPY1]], [[C]](s64)
+ ; CHECK-NEXT: $x0 = COPY [[FSHR]](s64)
+ ; CHECK-NEXT: RET_ReallyLR implicit $x0
+ %0:_(s64) = COPY $x0
+ %1:_(s64) = COPY $x1
+ %2:_(s64) = G_CONSTANT i64 72
+ %3:_(s64) = G_FSHR %0, %1, %2(s64)
+ $x0 = COPY %3(s64)
+ RET_ReallyLR implicit $x0
+
+...
+
+---
+name: fshr_i64_shift_by_bidwidth
+alignment: 4
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $x0, $x1
+
+ ; CHECK-LABEL: name: fshr_i64_shift_by_bidwidth
+ ; CHECK: liveins: $x0, $x1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x1
+ ; CHECK-NEXT: $x0 = COPY [[COPY]](s64)
+ ; CHECK-NEXT: RET_ReallyLR implicit $x0
+ %0:_(s64) = COPY $x0
+ %1:_(s64) = COPY $x1
+ %2:_(s64) = G_CONSTANT i64 64
+ %3:_(s64) = G_FSHR %0, %1, %2(s64)
+ $x0 = COPY %3(s64)
+ RET_ReallyLR implicit $x0
+
+...
diff --git a/llvm/test/CodeGen/AArch64/funnel-shift.ll b/llvm/test/CodeGen/AArch64/funnel-shift.ll
index bdc7de9e853f401..20a6dd0899b40aa 100644
--- a/llvm/test/CodeGen/AArch64/funnel-shift.ll
+++ b/llvm/test/CodeGen/AArch64/funnel-shift.ll
@@ -394,15 +394,10 @@ define i8 @fshr_i8_const_fold_overshift_2() {
}
define i8 @fshr_i8_const_fold_overshift_3() {
-; CHECK-SD-LABEL: fshr_i8_const_fold_overshift_3:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: mov w0, #255 // =0xff
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: fshr_i8_const_fold_overshift_3:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: mov w0, #-1 // =0xffffffff
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: fshr_i8_const_fold_overshift_3:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w0, #255 // =0xff
+; CHECK-NEXT: ret
%f = call i8 @llvm.fshr.i8(i8 0, i8 255, i8 8)
ret i8 %f
}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fshl.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fshl.ll
index 381bb724b091bc8..139bb40daa930a8 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fshl.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fshl.ll
@@ -2790,33 +2790,33 @@ define amdgpu_ps i32 @s_fshl_i32_5(i32 inreg %lhs, i32 inreg %rhs) {
; GFX6-LABEL: s_fshl_i32_5:
; GFX6: ; %bb.0:
; GFX6-NEXT: v_mov_b32_e32 v0, s1
-; GFX6-NEXT: v_alignbit_b32 v0, s0, v0, -5
+; GFX6-NEXT: v_alignbit_b32 v0, s0, v0, 27
; GFX6-NEXT: v_readfirstlane_b32 s0, v0
; GFX6-NEXT: ; return to shader part epilog
;
; GFX8-LABEL: s_fshl_i32_5:
; GFX8: ; %bb.0:
; GFX8-NEXT: v_mov_b32_e32 v0, s1
-; GFX8-NEXT: v_alignbit_b32 v0, s0, v0, -5
+; GFX8-NEXT: v_alignbit_b32 v0, s0, v0, 27
; GFX8-NEXT: v_readfirstlane_b32 s0, v0
; GFX8-NEXT: ; return to shader part epilog
;
; GFX9-LABEL: s_fshl_i32_5:
; GFX9: ; %bb.0:
; GFX9-NEXT: v_mov_b32_e32 v0, s1
-; GFX9-NEXT: v_alignbit_b32 v0, s0, v0, -5
+; GFX9-NEXT: v_alignbit_b32 v0, s0, v0, 27
; GFX9-NEXT: v_readfirstlane_b32 s0, v0
; GFX9-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: s_fshl_i32_5:
; GFX10: ; %bb.0:
-; GFX10-NEXT: v_alignbit_b32 v0, s0, s1, -5
+; GFX10-NEXT: v_alignbit_b32 v0, s0, s1, 27
; GFX10-NEXT: v_readfirstlane_b32 s0, v0
; GFX10-NEXT: ; return to shader part epilog
;
; GFX11-LABEL: s_fshl_i32_5:
; GFX11: ; %bb.0:
-; GFX11-NEXT: v_alignbit_b32 v0, s0, s1, -5
+; GFX11-NEXT: v_alignbit_b32 v0, s0, s1, 27
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_readfirstlane_b32 s0, v0
; GFX11-NEXT: ; return to shader part epilog
@@ -2828,33 +2828,33 @@ define amdgpu_ps i32 @s_fshl_i32_8(i32 inreg %lhs, i32 inreg %rhs) {
; GFX6-LABEL: s_fshl_i32_8:
; GFX6: ; %bb.0:
; GFX6-NEXT: v_mov_b32_e32 v0, s1
-; GFX6-NEXT: v_alignbit_b32 v0, s0, v0, -8
+; GFX6-NEXT: v_alignbit_b32 v0, s0, v0, 24
; GFX6-NEXT: v_readfirstlane_b32 s0, v0
; GFX6-NEXT: ; return to shader part epilog
;
; GFX8-LABEL: s_fshl_i32_8:
; GFX8: ; %bb.0:
; GFX8-NEXT: v_mov_b32_e32 v0, s1
-; GFX8-NEXT: v_alignbit_b32 v0, s0, v0, -8
+; GFX8-NEXT: v_alignbit_b32 v0, s0, v0, 24
; GFX8-NEXT: v_readfirstlane_b32 s0, v0
; GFX8-NEXT: ; return to shader part epilog
;
; GFX9-LABEL: s_fshl_i32_8:
; GFX9: ; %bb.0:
; GFX9-NEXT: v_mov_b32_e32 v0, s1
-; GFX9-NEXT: v_alignbit_b32 v0, s0, v0, -8
+; GFX9-NEXT: v_alignbit_b32 v0, s0, v0, 24
; GFX9-NEXT: v_readfirstlane_b32 s0, v0
; GFX9-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: s_fshl_i32_8:
; GFX10: ; %bb.0:
-; GFX10-NEXT: v_alignbit_b32 v0, s0, s1, -8
+; GFX10-NEXT: v_alignbit_b32 v0, s0, s1, 24
; GFX10-NEXT: v_readfirstlane_b32 s0, v0
; GFX10-NEXT: ; return to shader part epilog
;
; GFX11-LABEL: s_fshl_i32_8:
; GFX11: ; %bb.0:
-; GFX11-NEXT: v_alignbit_b32 v0, s0, s1, -8
+; GFX11-NEXT: v_alignbit_b32 v0, s0, s1, 24
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_readfirstlane_b32 s0, v0
; GFX11-NEXT: ; return to shader part epilog
@@ -2889,13 +2889,13 @@ define i32 @v_fshl_i32_5(i32 %lhs, i32 %rhs) {
; GCN-LABEL: v_fshl_i32_5:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT: v_alignbit_b32 v0, v0, v1, -5
+; GCN-NEXT: v_alignbit_b32 v0, v0, v1, 27
; GCN-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_fshl_i32_5:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_alignbit_b32 v0, v0, v1, -5
+; GFX11-NEXT: v_alignbit_b32 v0, v0, v1, 27
; GFX11-NEXT: s_setpc_b64 s[30:31]
%result = call i32 @llvm.fshl.i32(i32 %lhs, i32 %rhs, i32 5)
ret i32 %result
@@ -2905,13 +2905,13 @@ define i32 @v_fshl_i32_8(i32 %lhs, i32 %rhs) {
; GCN-LABEL: v_fshl_i32_8:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT: v_alignbit_b32 v0, v0, v1, -8
+; GCN-NEXT: v_alignbit_b32 v0, v0, v1, 24
; GCN-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_fshl_i32_8:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_alignbit_b32 v0, v0, v1, -8
+; GFX11-NEXT: v_alignbit_b32 v0, v0, v1, 24
; GFX11-NEXT: s_setpc_b64 s[30:31]
%result = call i32 @llvm.fshl.i32(i32 %lhs, i32 %rhs, i32 8)
ret i32 %result
More information about the llvm-commits
mailing list