[llvm] [RISCV] Optimize (slli (srli (slli X, C1), C1), C2) -> (srli (slli X, C1), C1-C2) (PR #119567)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Dec 11 07:06:24 PST 2024
- Previous message: [llvm] [RISCV] Optimize (slli (srli (slli X, C1), C1), C2) -> (srli (slli X, C1), C1-C2) (PR #119567)
- Next message: [llvm] [RISCV] Optimize (slli (srli (slli X, C1), C1), C2) -> (srli (slli X, C1), C1-C2) (PR #119567)
- Messages sorted by:
[ date ]
[ thread ]
[ subject ]
[ author ]
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-risc-v
Author: Piotr Fusik (pfusik)
<details>
<summary>Changes</summary>
---
Patch is 72.38 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/119567.diff
6 Files Affected:
- (modified) llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp (+18-2)
- (added) llvm/test/CodeGen/RISCV/and-shl.ll (+22)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll (+84-92)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll (+4-4)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll (+254-262)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll (+232-240)
``````````diff
diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
index c5432619a36462..4490e1b4c035cd 100644
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
@@ -1026,13 +1026,13 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
unsigned ShAmt = N1C->getZExtValue();
uint64_t Mask = N0.getConstantOperandVal(1);
- // Optimize (shl (and X, C2), C) -> (slli (srliw X, C3), C3+C) where C2 has
- // 32 leading zeros and C3 trailing zeros.
if (ShAmt <= 32 && isShiftedMask_64(Mask)) {
unsigned XLen = Subtarget->getXLen();
unsigned LeadingZeros = XLen - llvm::bit_width(Mask);
unsigned TrailingZeros = llvm::countr_zero(Mask);
if (TrailingZeros > 0 && LeadingZeros == 32) {
+ // Optimize (shl (and X, C2), C) -> (slli (srliw X, C3), C3+C)
+ // where C2 has 32 leading zeros and C3 trailing zeros.
SDNode *SRLIW = CurDAG->getMachineNode(
RISCV::SRLIW, DL, VT, N0->getOperand(0),
CurDAG->getTargetConstant(TrailingZeros, DL, VT));
@@ -1042,6 +1042,22 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
ReplaceNode(Node, SLLI);
return;
}
+ else if (TrailingZeros == 0 && LeadingZeros > ShAmt &&
+ XLen - LeadingZeros > 11 && LeadingZeros != 32) {
+ // Optimize (shl (and X, C2), C) -> (srli (slli X, C4), C4-C)
+ // where C2 has C4 leading zeros and no trailing zeros.
+ // This is profitable if the "and" was to be lowered to
+ // (srli (slli X, C4), C4) and not (andi X, C2).
+ // For "LeadingZeros == 32" we prefer Zba (slli.uw X, C).
+ SDNode *SLLI = CurDAG->getMachineNode(
+ RISCV::SLLI, DL, VT, N0->getOperand(0),
+ CurDAG->getTargetConstant(LeadingZeros, DL, VT));
+ SDNode *SRLI = CurDAG->getMachineNode(
+ RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
+ CurDAG->getTargetConstant(LeadingZeros - ShAmt, DL, VT));
+ ReplaceNode(Node, SRLI);
+ return;
+ }
}
break;
}
diff --git a/llvm/test/CodeGen/RISCV/and-shl.ll b/llvm/test/CodeGen/RISCV/and-shl.ll
new file mode 100644
index 00000000000000..754df62fb4307e
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/and-shl.ll
@@ -0,0 +1,22 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \
+; RUN: | FileCheck %s -check-prefix=RV32I
+; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
+; RUN: | FileCheck %s -check-prefix=RV64I
+
+define i32 @and_0xfff_shl_2(i32 %x) {
+; RV32I-LABEL: and_0xfff_shl_2:
+; RV32I: # %bb.0:
+; RV32I-NEXT: slli a0, a0, 20
+; RV32I-NEXT: srli a0, a0, 18
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: and_0xfff_shl_2:
+; RV64I: # %bb.0:
+; RV64I-NEXT: slli a0, a0, 52
+; RV64I-NEXT: srli a0, a0, 50
+; RV64I-NEXT: ret
+ %a = and i32 %x, 4095
+ %s = shl i32 %a, 2
+ ret i32 %s
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll
index 7f4483a8f77d9c..ddcb3c3121bc3d 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll
@@ -124,42 +124,40 @@ define <3 x i15> @fp2si_v3f32_v3i15(<3 x float> %x) {
; ZVFH32: # %bb.0:
; ZVFH32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; ZVFH32-NEXT: vfncvt.rtz.x.f.w v9, v8
-; ZVFH32-NEXT: lui a1, 8
; ZVFH32-NEXT: vslidedown.vi v8, v9, 2
-; ZVFH32-NEXT: vmv.x.s a2, v9
-; ZVFH32-NEXT: addi a1, a1, -1
+; ZVFH32-NEXT: vmv.x.s a1, v9
; ZVFH32-NEXT: vslidedown.vi v9, v9, 1
-; ZVFH32-NEXT: vmv.x.s a3, v8
-; ZVFH32-NEXT: and a2, a2, a1
-; ZVFH32-NEXT: vmv.x.s a4, v9
-; ZVFH32-NEXT: and a1, a4, a1
-; ZVFH32-NEXT: slli a4, a3, 17
-; ZVFH32-NEXT: slli a3, a3, 30
-; ZVFH32-NEXT: srli a4, a4, 19
-; ZVFH32-NEXT: slli a1, a1, 15
-; ZVFH32-NEXT: or a2, a2, a3
-; ZVFH32-NEXT: or a1, a2, a1
+; ZVFH32-NEXT: vmv.x.s a2, v8
+; ZVFH32-NEXT: slli a1, a1, 17
+; ZVFH32-NEXT: srli a1, a1, 17
+; ZVFH32-NEXT: slli a3, a2, 30
+; ZVFH32-NEXT: or a1, a1, a3
+; ZVFH32-NEXT: vmv.x.s a3, v9
+; ZVFH32-NEXT: slli a2, a2, 17
+; ZVFH32-NEXT: slli a3, a3, 17
+; ZVFH32-NEXT: srli a2, a2, 19
+; ZVFH32-NEXT: srli a3, a3, 2
+; ZVFH32-NEXT: or a1, a1, a3
; ZVFH32-NEXT: sw a1, 0(a0)
-; ZVFH32-NEXT: sh a4, 4(a0)
+; ZVFH32-NEXT: sh a2, 4(a0)
; ZVFH32-NEXT: ret
;
; ZVFH64-LABEL: fp2si_v3f32_v3i15:
; ZVFH64: # %bb.0:
; ZVFH64-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; ZVFH64-NEXT: vfncvt.rtz.x.f.w v9, v8
-; ZVFH64-NEXT: lui a1, 8
-; ZVFH64-NEXT: vmv.x.s a2, v9
-; ZVFH64-NEXT: addiw a1, a1, -1
+; ZVFH64-NEXT: vmv.x.s a1, v9
; ZVFH64-NEXT: vslidedown.vi v8, v9, 1
; ZVFH64-NEXT: vslidedown.vi v9, v9, 2
-; ZVFH64-NEXT: and a2, a2, a1
-; ZVFH64-NEXT: vmv.x.s a3, v8
-; ZVFH64-NEXT: and a1, a3, a1
+; ZVFH64-NEXT: slli a1, a1, 49
+; ZVFH64-NEXT: vmv.x.s a2, v8
; ZVFH64-NEXT: vmv.x.s a3, v9
+; ZVFH64-NEXT: srli a1, a1, 49
+; ZVFH64-NEXT: slli a2, a2, 49
; ZVFH64-NEXT: slli a3, a3, 30
-; ZVFH64-NEXT: slli a1, a1, 15
-; ZVFH64-NEXT: or a2, a2, a3
-; ZVFH64-NEXT: or a1, a2, a1
+; ZVFH64-NEXT: srli a2, a2, 34
+; ZVFH64-NEXT: or a1, a1, a3
+; ZVFH64-NEXT: or a1, a1, a2
; ZVFH64-NEXT: slli a2, a1, 19
; ZVFH64-NEXT: srli a2, a2, 51
; ZVFH64-NEXT: sw a1, 0(a0)
@@ -170,42 +168,40 @@ define <3 x i15> @fp2si_v3f32_v3i15(<3 x float> %x) {
; ZVFHMIN32: # %bb.0:
; ZVFHMIN32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; ZVFHMIN32-NEXT: vfncvt.rtz.x.f.w v9, v8
-; ZVFHMIN32-NEXT: lui a1, 8
; ZVFHMIN32-NEXT: vslidedown.vi v8, v9, 2
-; ZVFHMIN32-NEXT: vmv.x.s a2, v9
-; ZVFHMIN32-NEXT: addi a1, a1, -1
+; ZVFHMIN32-NEXT: vmv.x.s a1, v9
; ZVFHMIN32-NEXT: vslidedown.vi v9, v9, 1
-; ZVFHMIN32-NEXT: vmv.x.s a3, v8
-; ZVFHMIN32-NEXT: and a2, a2, a1
-; ZVFHMIN32-NEXT: vmv.x.s a4, v9
-; ZVFHMIN32-NEXT: and a1, a4, a1
-; ZVFHMIN32-NEXT: slli a4, a3, 17
-; ZVFHMIN32-NEXT: slli a3, a3, 30
-; ZVFHMIN32-NEXT: srli a4, a4, 19
-; ZVFHMIN32-NEXT: slli a1, a1, 15
-; ZVFHMIN32-NEXT: or a2, a2, a3
-; ZVFHMIN32-NEXT: or a1, a2, a1
+; ZVFHMIN32-NEXT: vmv.x.s a2, v8
+; ZVFHMIN32-NEXT: slli a1, a1, 17
+; ZVFHMIN32-NEXT: srli a1, a1, 17
+; ZVFHMIN32-NEXT: slli a3, a2, 30
+; ZVFHMIN32-NEXT: or a1, a1, a3
+; ZVFHMIN32-NEXT: vmv.x.s a3, v9
+; ZVFHMIN32-NEXT: slli a2, a2, 17
+; ZVFHMIN32-NEXT: slli a3, a3, 17
+; ZVFHMIN32-NEXT: srli a2, a2, 19
+; ZVFHMIN32-NEXT: srli a3, a3, 2
+; ZVFHMIN32-NEXT: or a1, a1, a3
; ZVFHMIN32-NEXT: sw a1, 0(a0)
-; ZVFHMIN32-NEXT: sh a4, 4(a0)
+; ZVFHMIN32-NEXT: sh a2, 4(a0)
; ZVFHMIN32-NEXT: ret
;
; ZVFHMIN64-LABEL: fp2si_v3f32_v3i15:
; ZVFHMIN64: # %bb.0:
; ZVFHMIN64-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; ZVFHMIN64-NEXT: vfncvt.rtz.x.f.w v9, v8
-; ZVFHMIN64-NEXT: lui a1, 8
-; ZVFHMIN64-NEXT: vmv.x.s a2, v9
-; ZVFHMIN64-NEXT: addiw a1, a1, -1
+; ZVFHMIN64-NEXT: vmv.x.s a1, v9
; ZVFHMIN64-NEXT: vslidedown.vi v8, v9, 1
; ZVFHMIN64-NEXT: vslidedown.vi v9, v9, 2
-; ZVFHMIN64-NEXT: and a2, a2, a1
-; ZVFHMIN64-NEXT: vmv.x.s a3, v8
-; ZVFHMIN64-NEXT: and a1, a3, a1
+; ZVFHMIN64-NEXT: slli a1, a1, 49
+; ZVFHMIN64-NEXT: vmv.x.s a2, v8
; ZVFHMIN64-NEXT: vmv.x.s a3, v9
+; ZVFHMIN64-NEXT: srli a1, a1, 49
+; ZVFHMIN64-NEXT: slli a2, a2, 49
; ZVFHMIN64-NEXT: slli a3, a3, 30
-; ZVFHMIN64-NEXT: slli a1, a1, 15
-; ZVFHMIN64-NEXT: or a2, a2, a3
-; ZVFHMIN64-NEXT: or a1, a2, a1
+; ZVFHMIN64-NEXT: srli a2, a2, 34
+; ZVFHMIN64-NEXT: or a1, a1, a3
+; ZVFHMIN64-NEXT: or a1, a1, a2
; ZVFHMIN64-NEXT: slli a2, a1, 19
; ZVFHMIN64-NEXT: srli a2, a2, 51
; ZVFHMIN64-NEXT: sw a1, 0(a0)
@@ -221,42 +217,40 @@ define <3 x i15> @fp2ui_v3f32_v3i15(<3 x float> %x) {
; ZVFH32: # %bb.0:
; ZVFH32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; ZVFH32-NEXT: vfncvt.rtz.x.f.w v9, v8
-; ZVFH32-NEXT: lui a1, 16
; ZVFH32-NEXT: vslidedown.vi v8, v9, 2
-; ZVFH32-NEXT: vmv.x.s a2, v9
-; ZVFH32-NEXT: addi a1, a1, -1
+; ZVFH32-NEXT: vmv.x.s a1, v9
; ZVFH32-NEXT: vslidedown.vi v9, v9, 1
-; ZVFH32-NEXT: vmv.x.s a3, v8
-; ZVFH32-NEXT: and a2, a2, a1
-; ZVFH32-NEXT: vmv.x.s a4, v9
-; ZVFH32-NEXT: and a1, a4, a1
-; ZVFH32-NEXT: slli a4, a3, 17
-; ZVFH32-NEXT: slli a3, a3, 30
-; ZVFH32-NEXT: srli a4, a4, 19
-; ZVFH32-NEXT: slli a1, a1, 15
-; ZVFH32-NEXT: or a2, a2, a3
-; ZVFH32-NEXT: or a1, a2, a1
+; ZVFH32-NEXT: vmv.x.s a2, v8
+; ZVFH32-NEXT: slli a1, a1, 16
+; ZVFH32-NEXT: srli a1, a1, 16
+; ZVFH32-NEXT: slli a3, a2, 30
+; ZVFH32-NEXT: or a1, a1, a3
+; ZVFH32-NEXT: vmv.x.s a3, v9
+; ZVFH32-NEXT: slli a2, a2, 17
+; ZVFH32-NEXT: slli a3, a3, 16
+; ZVFH32-NEXT: srli a2, a2, 19
+; ZVFH32-NEXT: srli a3, a3, 1
+; ZVFH32-NEXT: or a1, a1, a3
; ZVFH32-NEXT: sw a1, 0(a0)
-; ZVFH32-NEXT: sh a4, 4(a0)
+; ZVFH32-NEXT: sh a2, 4(a0)
; ZVFH32-NEXT: ret
;
; ZVFH64-LABEL: fp2ui_v3f32_v3i15:
; ZVFH64: # %bb.0:
; ZVFH64-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; ZVFH64-NEXT: vfncvt.rtz.x.f.w v9, v8
-; ZVFH64-NEXT: lui a1, 16
-; ZVFH64-NEXT: vmv.x.s a2, v9
-; ZVFH64-NEXT: addiw a1, a1, -1
+; ZVFH64-NEXT: vmv.x.s a1, v9
; ZVFH64-NEXT: vslidedown.vi v8, v9, 1
; ZVFH64-NEXT: vslidedown.vi v9, v9, 2
-; ZVFH64-NEXT: and a2, a2, a1
-; ZVFH64-NEXT: vmv.x.s a3, v8
-; ZVFH64-NEXT: and a1, a3, a1
+; ZVFH64-NEXT: slli a1, a1, 48
+; ZVFH64-NEXT: vmv.x.s a2, v8
; ZVFH64-NEXT: vmv.x.s a3, v9
+; ZVFH64-NEXT: srli a1, a1, 48
+; ZVFH64-NEXT: slli a2, a2, 48
; ZVFH64-NEXT: slli a3, a3, 30
-; ZVFH64-NEXT: slli a1, a1, 15
-; ZVFH64-NEXT: or a2, a2, a3
-; ZVFH64-NEXT: or a1, a2, a1
+; ZVFH64-NEXT: srli a2, a2, 33
+; ZVFH64-NEXT: or a1, a1, a3
+; ZVFH64-NEXT: or a1, a1, a2
; ZVFH64-NEXT: slli a2, a1, 19
; ZVFH64-NEXT: srli a2, a2, 51
; ZVFH64-NEXT: sw a1, 0(a0)
@@ -267,42 +261,40 @@ define <3 x i15> @fp2ui_v3f32_v3i15(<3 x float> %x) {
; ZVFHMIN32: # %bb.0:
; ZVFHMIN32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; ZVFHMIN32-NEXT: vfncvt.rtz.x.f.w v9, v8
-; ZVFHMIN32-NEXT: lui a1, 16
; ZVFHMIN32-NEXT: vslidedown.vi v8, v9, 2
-; ZVFHMIN32-NEXT: vmv.x.s a2, v9
-; ZVFHMIN32-NEXT: addi a1, a1, -1
+; ZVFHMIN32-NEXT: vmv.x.s a1, v9
; ZVFHMIN32-NEXT: vslidedown.vi v9, v9, 1
-; ZVFHMIN32-NEXT: vmv.x.s a3, v8
-; ZVFHMIN32-NEXT: and a2, a2, a1
-; ZVFHMIN32-NEXT: vmv.x.s a4, v9
-; ZVFHMIN32-NEXT: and a1, a4, a1
-; ZVFHMIN32-NEXT: slli a4, a3, 17
-; ZVFHMIN32-NEXT: slli a3, a3, 30
-; ZVFHMIN32-NEXT: srli a4, a4, 19
-; ZVFHMIN32-NEXT: slli a1, a1, 15
-; ZVFHMIN32-NEXT: or a2, a2, a3
-; ZVFHMIN32-NEXT: or a1, a2, a1
+; ZVFHMIN32-NEXT: vmv.x.s a2, v8
+; ZVFHMIN32-NEXT: slli a1, a1, 16
+; ZVFHMIN32-NEXT: srli a1, a1, 16
+; ZVFHMIN32-NEXT: slli a3, a2, 30
+; ZVFHMIN32-NEXT: or a1, a1, a3
+; ZVFHMIN32-NEXT: vmv.x.s a3, v9
+; ZVFHMIN32-NEXT: slli a2, a2, 17
+; ZVFHMIN32-NEXT: slli a3, a3, 16
+; ZVFHMIN32-NEXT: srli a2, a2, 19
+; ZVFHMIN32-NEXT: srli a3, a3, 1
+; ZVFHMIN32-NEXT: or a1, a1, a3
; ZVFHMIN32-NEXT: sw a1, 0(a0)
-; ZVFHMIN32-NEXT: sh a4, 4(a0)
+; ZVFHMIN32-NEXT: sh a2, 4(a0)
; ZVFHMIN32-NEXT: ret
;
; ZVFHMIN64-LABEL: fp2ui_v3f32_v3i15:
; ZVFHMIN64: # %bb.0:
; ZVFHMIN64-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; ZVFHMIN64-NEXT: vfncvt.rtz.x.f.w v9, v8
-; ZVFHMIN64-NEXT: lui a1, 16
-; ZVFHMIN64-NEXT: vmv.x.s a2, v9
-; ZVFHMIN64-NEXT: addiw a1, a1, -1
+; ZVFHMIN64-NEXT: vmv.x.s a1, v9
; ZVFHMIN64-NEXT: vslidedown.vi v8, v9, 1
; ZVFHMIN64-NEXT: vslidedown.vi v9, v9, 2
-; ZVFHMIN64-NEXT: and a2, a2, a1
-; ZVFHMIN64-NEXT: vmv.x.s a3, v8
-; ZVFHMIN64-NEXT: and a1, a3, a1
+; ZVFHMIN64-NEXT: slli a1, a1, 48
+; ZVFHMIN64-NEXT: vmv.x.s a2, v8
; ZVFHMIN64-NEXT: vmv.x.s a3, v9
+; ZVFHMIN64-NEXT: srli a1, a1, 48
+; ZVFHMIN64-NEXT: slli a2, a2, 48
; ZVFHMIN64-NEXT: slli a3, a3, 30
-; ZVFHMIN64-NEXT: slli a1, a1, 15
-; ZVFHMIN64-NEXT: or a2, a2, a3
-; ZVFHMIN64-NEXT: or a1, a2, a1
+; ZVFHMIN64-NEXT: srli a2, a2, 33
+; ZVFHMIN64-NEXT: or a1, a1, a3
+; ZVFHMIN64-NEXT: or a1, a1, a2
; ZVFHMIN64-NEXT: slli a2, a1, 19
; ZVFHMIN64-NEXT: srli a2, a2, 51
; ZVFHMIN64-NEXT: sw a1, 0(a0)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
index e9fd0a19e3eb66..139f7b4e6a0c80 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
@@ -3296,11 +3296,11 @@ define <4 x i16> @buildvec_v4i16_pack(i16 %e1, i16 %e2, i16 %e3, i16 %e4) {
; RVA22U64-LABEL: buildvec_v4i16_pack:
; RVA22U64: # %bb.0:
; RVA22U64-NEXT: slli a3, a3, 48
-; RVA22U64-NEXT: zext.h a2, a2
+; RVA22U64-NEXT: slli a2, a2, 48
; RVA22U64-NEXT: zext.h a0, a0
-; RVA22U64-NEXT: zext.h a1, a1
-; RVA22U64-NEXT: slli a2, a2, 32
-; RVA22U64-NEXT: slli a1, a1, 16
+; RVA22U64-NEXT: slli a1, a1, 48
+; RVA22U64-NEXT: srli a2, a2, 16
+; RVA22U64-NEXT: srli a1, a1, 32
; RVA22U64-NEXT: or a2, a2, a3
; RVA22U64-NEXT: or a0, a0, a1
; RVA22U64-NEXT: or a0, a0, a2
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
index 141d54cf585f28..c6e12c52122d27 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
@@ -3205,88 +3205,86 @@ define <8 x i32> @mgather_baseidx_zext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <
;
; RV64ZVE32F-LABEL: mgather_baseidx_zext_v8i16_v8i32:
; RV64ZVE32F: # %bb.0:
-; RV64ZVE32F-NEXT: lui a1, 16
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; RV64ZVE32F-NEXT: vmv.x.s a2, v0
-; RV64ZVE32F-NEXT: andi a3, a2, 1
-; RV64ZVE32F-NEXT: addiw a1, a1, -1
-; RV64ZVE32F-NEXT: beqz a3, .LBB40_2
+; RV64ZVE32F-NEXT: vmv.x.s a1, v0
+; RV64ZVE32F-NEXT: andi a2, a1, 1
+; RV64ZVE32F-NEXT: beqz a2, .LBB40_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; RV64ZVE32F-NEXT: vmv.x.s a3, v8
-; RV64ZVE32F-NEXT: and a3, a3, a1
-; RV64ZVE32F-NEXT: slli a3, a3, 2
-; RV64ZVE32F-NEXT: add a3, a0, a3
-; RV64ZVE32F-NEXT: lw a3, 0(a3)
+; RV64ZVE32F-NEXT: vmv.x.s a2, v8
+; RV64ZVE32F-NEXT: slli a2, a2, 48
+; RV64ZVE32F-NEXT: srli a2, a2, 46
+; RV64ZVE32F-NEXT: add a2, a0, a2
+; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, tu, ma
-; RV64ZVE32F-NEXT: vmv.s.x v10, a3
+; RV64ZVE32F-NEXT: vmv.s.x v10, a2
; RV64ZVE32F-NEXT: .LBB40_2: # %else
-; RV64ZVE32F-NEXT: andi a3, a2, 2
-; RV64ZVE32F-NEXT: beqz a3, .LBB40_4
+; RV64ZVE32F-NEXT: andi a2, a1, 2
+; RV64ZVE32F-NEXT: beqz a2, .LBB40_4
; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
-; RV64ZVE32F-NEXT: vmv.x.s a3, v9
-; RV64ZVE32F-NEXT: and a3, a3, a1
-; RV64ZVE32F-NEXT: slli a3, a3, 2
-; RV64ZVE32F-NEXT: add a3, a0, a3
-; RV64ZVE32F-NEXT: lw a3, 0(a3)
+; RV64ZVE32F-NEXT: vmv.x.s a2, v9
+; RV64ZVE32F-NEXT: slli a2, a2, 48
+; RV64ZVE32F-NEXT: srli a2, a2, 46
+; RV64ZVE32F-NEXT: add a2, a0, a2
+; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vmv.s.x v9, a3
+; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 1
; RV64ZVE32F-NEXT: .LBB40_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
-; RV64ZVE32F-NEXT: andi a3, a2, 4
+; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
-; RV64ZVE32F-NEXT: bnez a3, .LBB40_14
+; RV64ZVE32F-NEXT: bnez a2, .LBB40_14
; RV64ZVE32F-NEXT: # %bb.5: # %else5
-; RV64ZVE32F-NEXT: andi a3, a2, 8
-; RV64ZVE32F-NEXT: bnez a3, .LBB40_15
+; RV64ZVE32F-NEXT: andi a2, a1, 8
+; RV64ZVE32F-NEXT: bnez a2, .LBB40_15
; RV64ZVE32F-NEXT: .LBB40_6: # %else8
-; RV64ZVE32F-NEXT: andi a3, a2, 16
-; RV64ZVE32F-NEXT: bnez a3, .LBB40_16
+; RV64ZVE32F-NEXT: andi a2, a1, 16
+; RV64ZVE32F-NEXT: bnez a2, .LBB40_16
; RV64ZVE32F-NEXT: .LBB40_7: # %else11
-; RV64ZVE32F-NEXT: andi a3, a2, 32
-; RV64ZVE32F-NEXT: beqz a3, .LBB40_9
+; RV64ZVE32F-NEXT: andi a2, a1, 32
+; RV64ZVE32F-NEXT: beqz a2, .LBB40_9
; RV64ZVE32F-NEXT: .LBB40_8: # %cond.load13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
-; RV64ZVE32F-NEXT: vmv.x.s a3, v8
-; RV64ZVE32F-NEXT: and a3, a3, a1
-; RV64ZVE32F-NEXT: slli a3, a3, 2
-; RV64ZVE32F-NEXT: add a3, a0, a3
-; RV64ZVE32F-NEXT: lw a3, 0(a3)
+; RV64ZVE32F-NEXT: vmv.x.s a2, v8
+; RV64ZVE32F-NEXT: slli a2, a2, 48
+; RV64ZVE32F-NEXT: srli a2, a2, 46
+; RV64ZVE32F-NEXT: add a2, a0, a2
+; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vmv.s.x v12, a3
+; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 5
; RV64ZVE32F-NEXT: .LBB40_9: # %else14
-; RV64ZVE32F-NEXT: andi a3, a2, 64
+; RV64ZVE32F-NEXT: andi a2, a1, 64
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
-; RV64ZVE32F-NEXT: beqz a3, .LBB40_11
+; RV64ZVE32F-NEXT: beqz a2, .LBB40_11
; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16
-; RV64ZVE32F-NEXT: vmv.x.s a3, v8
-; RV64ZVE32F-NEXT: and a3, a3, a1
-; RV64ZVE32F-NEXT: slli a3, a3, 2
-; RV64ZVE32F-NEXT: add a3, a0, a3
-; RV64ZVE32F-NEXT: lw a3, 0(a3)
+; RV64ZVE32F-NEXT: vmv.x.s a2, v8
+; RV64ZVE32F-NEXT: slli a2, a2, 48
+; RV64ZVE32F-NEXT: srli a2, a2, 46
+; RV64ZVE32F-NEXT: add a2, a0, a2
+; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vmv.s.x v12, a3
+; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6
; RV64ZVE32F-NEXT: .LBB40_11: # %else17
-; RV64ZVE32F-NEXT: andi a2, a2, -128
-; RV64ZVE32F-NEXT: beqz a2, .LBB40_13
+; RV64ZVE32F-NEXT: andi a1, a1, -128
+; RV64ZVE32F-NEXT: beqz a1, .LBB40_13
; RV64ZVE32F-NEXT: # %bb.12: # %cond.load19
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
-; RV64ZVE32F-NEXT: vmv.x.s a2, v8
-; RV64ZVE32F-NEXT: and a1, a2, a1
-; RV64ZVE32F-NEXT: slli a1, a1, 2
+; RV64ZVE32F-NEXT: vmv.x.s a1, v8
+; RV64ZVE32F-NEXT: slli a1, a1, 48
+; RV64ZVE32F-NEXT: srli a1, a1, 46
; RV64ZVE32F-NEXT: add a0, a0, a1
; RV64ZVE32F-NEXT: lw a0, 0(a0)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
@@ -3298,44 +3296,44 @@ define <8 x i32> @mgather_baseidx_zext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <
; RV64ZVE32F-NEXT: vmv2r.v v8, v10
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB40_14: # %cond.load4
-; RV64ZVE32F-NEXT: vmv.x.s a3, v8
-; RV64ZVE32F-NEXT: and a3, a3, a1
-; RV64ZVE32F-NEXT: slli a3, a3, 2
-; RV64ZVE32F-NEXT: add a3, a0, a3
-; RV64ZVE32F-NEXT: lw a3, 0(a3)
+; RV64ZVE32F-NEXT: vmv.x.s a2, v8
+; RV64ZVE32F-NEXT: slli a2, a2, 48
+; RV64ZVE32F-NEXT: srli a2, a2, 46
+; RV64ZVE32F-NEXT: add a2, a0, a2
+; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vmv.s.x v12, a3
+; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2
-; RV64ZVE32F-NEXT: andi a3, a2, 8
-; RV64ZVE32F-NEXT: beqz a3, .LBB40_6
+; RV64ZVE32F-NEXT: andi a2, a1, 8
+; RV64ZVE...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/119567
- Previous message: [llvm] [RISCV] Optimize (slli (srli (slli X, C1), C1), C2) -> (srli (slli X, C1), C1-C2) (PR #119567)
- Next message: [llvm] [RISCV] Optimize (slli (srli (slli X, C1), C1), C2) -> (srli (slli X, C1), C1-C2) (PR #119567)
- Messages sorted by:
[ date ]
[ thread ]
[ subject ]
[ author ]
More information about the llvm-commits
mailing list