[llvm] [RISCV] Optimize (slli (srli (slli X, C1), C1), C2) -> (srli (slli X, C1), C1-C2) (PR #119567)

via llvm-commits llvm-commits at lists.llvm.org
Wed Dec 11 07:06:24 PST 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-risc-v

Author: Piotr Fusik (pfusik)

<details>
<summary>Changes</summary>



---

Patch is 72.38 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/119567.diff


6 Files Affected:

- (modified) llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp (+18-2) 
- (added) llvm/test/CodeGen/RISCV/and-shl.ll (+22) 
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll (+84-92) 
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll (+4-4) 
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll (+254-262) 
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll (+232-240) 


``````````diff
diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
index c5432619a36462..4490e1b4c035cd 100644
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
@@ -1026,13 +1026,13 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
     unsigned ShAmt = N1C->getZExtValue();
     uint64_t Mask = N0.getConstantOperandVal(1);
 
-    // Optimize (shl (and X, C2), C) -> (slli (srliw X, C3), C3+C) where C2 has
-    // 32 leading zeros and C3 trailing zeros.
     if (ShAmt <= 32 && isShiftedMask_64(Mask)) {
       unsigned XLen = Subtarget->getXLen();
       unsigned LeadingZeros = XLen - llvm::bit_width(Mask);
       unsigned TrailingZeros = llvm::countr_zero(Mask);
       if (TrailingZeros > 0 && LeadingZeros == 32) {
+        // Optimize (shl (and X, C2), C) -> (slli (srliw X, C3), C3+C)
+        // where C2 has 32 leading zeros and C3 trailing zeros.
         SDNode *SRLIW = CurDAG->getMachineNode(
             RISCV::SRLIW, DL, VT, N0->getOperand(0),
             CurDAG->getTargetConstant(TrailingZeros, DL, VT));
@@ -1042,6 +1042,22 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
         ReplaceNode(Node, SLLI);
         return;
       }
+      else if (TrailingZeros == 0 && LeadingZeros > ShAmt &&
+          XLen - LeadingZeros > 11 && LeadingZeros != 32) {
+        // Optimize (shl (and X, C2), C) -> (srli (slli X, C4), C4-C)
+        // where C2 has C4 leading zeros and no trailing zeros.
+        // This is profitable if the "and" was to be lowered to
+        // (srli (slli X, C4), C4) and not (andi X, C2).
+        // For "LeadingZeros == 32" we prefer Zba (slli.uw X, C).
+        SDNode *SLLI = CurDAG->getMachineNode(
+          RISCV::SLLI, DL, VT, N0->getOperand(0),
+          CurDAG->getTargetConstant(LeadingZeros, DL, VT));
+        SDNode *SRLI = CurDAG->getMachineNode(
+          RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
+          CurDAG->getTargetConstant(LeadingZeros - ShAmt, DL, VT));
+        ReplaceNode(Node, SRLI);
+        return;
+      }
     }
     break;
   }
diff --git a/llvm/test/CodeGen/RISCV/and-shl.ll b/llvm/test/CodeGen/RISCV/and-shl.ll
new file mode 100644
index 00000000000000..754df62fb4307e
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/and-shl.ll
@@ -0,0 +1,22 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s -check-prefix=RV32I
+; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s -check-prefix=RV64I
+
+define i32 @and_0xfff_shl_2(i32 %x) {
+; RV32I-LABEL: and_0xfff_shl_2:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    slli a0, a0, 20
+; RV32I-NEXT:    srli a0, a0, 18
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: and_0xfff_shl_2:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a0, a0, 52
+; RV64I-NEXT:    srli a0, a0, 50
+; RV64I-NEXT:    ret
+  %a = and i32 %x, 4095
+  %s = shl i32 %a, 2
+  ret i32 %s
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll
index 7f4483a8f77d9c..ddcb3c3121bc3d 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll
@@ -124,42 +124,40 @@ define <3 x i15> @fp2si_v3f32_v3i15(<3 x float> %x) {
 ; ZVFH32:       # %bb.0:
 ; ZVFH32-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
 ; ZVFH32-NEXT:    vfncvt.rtz.x.f.w v9, v8
-; ZVFH32-NEXT:    lui a1, 8
 ; ZVFH32-NEXT:    vslidedown.vi v8, v9, 2
-; ZVFH32-NEXT:    vmv.x.s a2, v9
-; ZVFH32-NEXT:    addi a1, a1, -1
+; ZVFH32-NEXT:    vmv.x.s a1, v9
 ; ZVFH32-NEXT:    vslidedown.vi v9, v9, 1
-; ZVFH32-NEXT:    vmv.x.s a3, v8
-; ZVFH32-NEXT:    and a2, a2, a1
-; ZVFH32-NEXT:    vmv.x.s a4, v9
-; ZVFH32-NEXT:    and a1, a4, a1
-; ZVFH32-NEXT:    slli a4, a3, 17
-; ZVFH32-NEXT:    slli a3, a3, 30
-; ZVFH32-NEXT:    srli a4, a4, 19
-; ZVFH32-NEXT:    slli a1, a1, 15
-; ZVFH32-NEXT:    or a2, a2, a3
-; ZVFH32-NEXT:    or a1, a2, a1
+; ZVFH32-NEXT:    vmv.x.s a2, v8
+; ZVFH32-NEXT:    slli a1, a1, 17
+; ZVFH32-NEXT:    srli a1, a1, 17
+; ZVFH32-NEXT:    slli a3, a2, 30
+; ZVFH32-NEXT:    or a1, a1, a3
+; ZVFH32-NEXT:    vmv.x.s a3, v9
+; ZVFH32-NEXT:    slli a2, a2, 17
+; ZVFH32-NEXT:    slli a3, a3, 17
+; ZVFH32-NEXT:    srli a2, a2, 19
+; ZVFH32-NEXT:    srli a3, a3, 2
+; ZVFH32-NEXT:    or a1, a1, a3
 ; ZVFH32-NEXT:    sw a1, 0(a0)
-; ZVFH32-NEXT:    sh a4, 4(a0)
+; ZVFH32-NEXT:    sh a2, 4(a0)
 ; ZVFH32-NEXT:    ret
 ;
 ; ZVFH64-LABEL: fp2si_v3f32_v3i15:
 ; ZVFH64:       # %bb.0:
 ; ZVFH64-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
 ; ZVFH64-NEXT:    vfncvt.rtz.x.f.w v9, v8
-; ZVFH64-NEXT:    lui a1, 8
-; ZVFH64-NEXT:    vmv.x.s a2, v9
-; ZVFH64-NEXT:    addiw a1, a1, -1
+; ZVFH64-NEXT:    vmv.x.s a1, v9
 ; ZVFH64-NEXT:    vslidedown.vi v8, v9, 1
 ; ZVFH64-NEXT:    vslidedown.vi v9, v9, 2
-; ZVFH64-NEXT:    and a2, a2, a1
-; ZVFH64-NEXT:    vmv.x.s a3, v8
-; ZVFH64-NEXT:    and a1, a3, a1
+; ZVFH64-NEXT:    slli a1, a1, 49
+; ZVFH64-NEXT:    vmv.x.s a2, v8
 ; ZVFH64-NEXT:    vmv.x.s a3, v9
+; ZVFH64-NEXT:    srli a1, a1, 49
+; ZVFH64-NEXT:    slli a2, a2, 49
 ; ZVFH64-NEXT:    slli a3, a3, 30
-; ZVFH64-NEXT:    slli a1, a1, 15
-; ZVFH64-NEXT:    or a2, a2, a3
-; ZVFH64-NEXT:    or a1, a2, a1
+; ZVFH64-NEXT:    srli a2, a2, 34
+; ZVFH64-NEXT:    or a1, a1, a3
+; ZVFH64-NEXT:    or a1, a1, a2
 ; ZVFH64-NEXT:    slli a2, a1, 19
 ; ZVFH64-NEXT:    srli a2, a2, 51
 ; ZVFH64-NEXT:    sw a1, 0(a0)
@@ -170,42 +168,40 @@ define <3 x i15> @fp2si_v3f32_v3i15(<3 x float> %x) {
 ; ZVFHMIN32:       # %bb.0:
 ; ZVFHMIN32-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
 ; ZVFHMIN32-NEXT:    vfncvt.rtz.x.f.w v9, v8
-; ZVFHMIN32-NEXT:    lui a1, 8
 ; ZVFHMIN32-NEXT:    vslidedown.vi v8, v9, 2
-; ZVFHMIN32-NEXT:    vmv.x.s a2, v9
-; ZVFHMIN32-NEXT:    addi a1, a1, -1
+; ZVFHMIN32-NEXT:    vmv.x.s a1, v9
 ; ZVFHMIN32-NEXT:    vslidedown.vi v9, v9, 1
-; ZVFHMIN32-NEXT:    vmv.x.s a3, v8
-; ZVFHMIN32-NEXT:    and a2, a2, a1
-; ZVFHMIN32-NEXT:    vmv.x.s a4, v9
-; ZVFHMIN32-NEXT:    and a1, a4, a1
-; ZVFHMIN32-NEXT:    slli a4, a3, 17
-; ZVFHMIN32-NEXT:    slli a3, a3, 30
-; ZVFHMIN32-NEXT:    srli a4, a4, 19
-; ZVFHMIN32-NEXT:    slli a1, a1, 15
-; ZVFHMIN32-NEXT:    or a2, a2, a3
-; ZVFHMIN32-NEXT:    or a1, a2, a1
+; ZVFHMIN32-NEXT:    vmv.x.s a2, v8
+; ZVFHMIN32-NEXT:    slli a1, a1, 17
+; ZVFHMIN32-NEXT:    srli a1, a1, 17
+; ZVFHMIN32-NEXT:    slli a3, a2, 30
+; ZVFHMIN32-NEXT:    or a1, a1, a3
+; ZVFHMIN32-NEXT:    vmv.x.s a3, v9
+; ZVFHMIN32-NEXT:    slli a2, a2, 17
+; ZVFHMIN32-NEXT:    slli a3, a3, 17
+; ZVFHMIN32-NEXT:    srli a2, a2, 19
+; ZVFHMIN32-NEXT:    srli a3, a3, 2
+; ZVFHMIN32-NEXT:    or a1, a1, a3
 ; ZVFHMIN32-NEXT:    sw a1, 0(a0)
-; ZVFHMIN32-NEXT:    sh a4, 4(a0)
+; ZVFHMIN32-NEXT:    sh a2, 4(a0)
 ; ZVFHMIN32-NEXT:    ret
 ;
 ; ZVFHMIN64-LABEL: fp2si_v3f32_v3i15:
 ; ZVFHMIN64:       # %bb.0:
 ; ZVFHMIN64-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
 ; ZVFHMIN64-NEXT:    vfncvt.rtz.x.f.w v9, v8
-; ZVFHMIN64-NEXT:    lui a1, 8
-; ZVFHMIN64-NEXT:    vmv.x.s a2, v9
-; ZVFHMIN64-NEXT:    addiw a1, a1, -1
+; ZVFHMIN64-NEXT:    vmv.x.s a1, v9
 ; ZVFHMIN64-NEXT:    vslidedown.vi v8, v9, 1
 ; ZVFHMIN64-NEXT:    vslidedown.vi v9, v9, 2
-; ZVFHMIN64-NEXT:    and a2, a2, a1
-; ZVFHMIN64-NEXT:    vmv.x.s a3, v8
-; ZVFHMIN64-NEXT:    and a1, a3, a1
+; ZVFHMIN64-NEXT:    slli a1, a1, 49
+; ZVFHMIN64-NEXT:    vmv.x.s a2, v8
 ; ZVFHMIN64-NEXT:    vmv.x.s a3, v9
+; ZVFHMIN64-NEXT:    srli a1, a1, 49
+; ZVFHMIN64-NEXT:    slli a2, a2, 49
 ; ZVFHMIN64-NEXT:    slli a3, a3, 30
-; ZVFHMIN64-NEXT:    slli a1, a1, 15
-; ZVFHMIN64-NEXT:    or a2, a2, a3
-; ZVFHMIN64-NEXT:    or a1, a2, a1
+; ZVFHMIN64-NEXT:    srli a2, a2, 34
+; ZVFHMIN64-NEXT:    or a1, a1, a3
+; ZVFHMIN64-NEXT:    or a1, a1, a2
 ; ZVFHMIN64-NEXT:    slli a2, a1, 19
 ; ZVFHMIN64-NEXT:    srli a2, a2, 51
 ; ZVFHMIN64-NEXT:    sw a1, 0(a0)
@@ -221,42 +217,40 @@ define <3 x i15> @fp2ui_v3f32_v3i15(<3 x float> %x) {
 ; ZVFH32:       # %bb.0:
 ; ZVFH32-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
 ; ZVFH32-NEXT:    vfncvt.rtz.x.f.w v9, v8
-; ZVFH32-NEXT:    lui a1, 16
 ; ZVFH32-NEXT:    vslidedown.vi v8, v9, 2
-; ZVFH32-NEXT:    vmv.x.s a2, v9
-; ZVFH32-NEXT:    addi a1, a1, -1
+; ZVFH32-NEXT:    vmv.x.s a1, v9
 ; ZVFH32-NEXT:    vslidedown.vi v9, v9, 1
-; ZVFH32-NEXT:    vmv.x.s a3, v8
-; ZVFH32-NEXT:    and a2, a2, a1
-; ZVFH32-NEXT:    vmv.x.s a4, v9
-; ZVFH32-NEXT:    and a1, a4, a1
-; ZVFH32-NEXT:    slli a4, a3, 17
-; ZVFH32-NEXT:    slli a3, a3, 30
-; ZVFH32-NEXT:    srli a4, a4, 19
-; ZVFH32-NEXT:    slli a1, a1, 15
-; ZVFH32-NEXT:    or a2, a2, a3
-; ZVFH32-NEXT:    or a1, a2, a1
+; ZVFH32-NEXT:    vmv.x.s a2, v8
+; ZVFH32-NEXT:    slli a1, a1, 16
+; ZVFH32-NEXT:    srli a1, a1, 16
+; ZVFH32-NEXT:    slli a3, a2, 30
+; ZVFH32-NEXT:    or a1, a1, a3
+; ZVFH32-NEXT:    vmv.x.s a3, v9
+; ZVFH32-NEXT:    slli a2, a2, 17
+; ZVFH32-NEXT:    slli a3, a3, 16
+; ZVFH32-NEXT:    srli a2, a2, 19
+; ZVFH32-NEXT:    srli a3, a3, 1
+; ZVFH32-NEXT:    or a1, a1, a3
 ; ZVFH32-NEXT:    sw a1, 0(a0)
-; ZVFH32-NEXT:    sh a4, 4(a0)
+; ZVFH32-NEXT:    sh a2, 4(a0)
 ; ZVFH32-NEXT:    ret
 ;
 ; ZVFH64-LABEL: fp2ui_v3f32_v3i15:
 ; ZVFH64:       # %bb.0:
 ; ZVFH64-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
 ; ZVFH64-NEXT:    vfncvt.rtz.x.f.w v9, v8
-; ZVFH64-NEXT:    lui a1, 16
-; ZVFH64-NEXT:    vmv.x.s a2, v9
-; ZVFH64-NEXT:    addiw a1, a1, -1
+; ZVFH64-NEXT:    vmv.x.s a1, v9
 ; ZVFH64-NEXT:    vslidedown.vi v8, v9, 1
 ; ZVFH64-NEXT:    vslidedown.vi v9, v9, 2
-; ZVFH64-NEXT:    and a2, a2, a1
-; ZVFH64-NEXT:    vmv.x.s a3, v8
-; ZVFH64-NEXT:    and a1, a3, a1
+; ZVFH64-NEXT:    slli a1, a1, 48
+; ZVFH64-NEXT:    vmv.x.s a2, v8
 ; ZVFH64-NEXT:    vmv.x.s a3, v9
+; ZVFH64-NEXT:    srli a1, a1, 48
+; ZVFH64-NEXT:    slli a2, a2, 48
 ; ZVFH64-NEXT:    slli a3, a3, 30
-; ZVFH64-NEXT:    slli a1, a1, 15
-; ZVFH64-NEXT:    or a2, a2, a3
-; ZVFH64-NEXT:    or a1, a2, a1
+; ZVFH64-NEXT:    srli a2, a2, 33
+; ZVFH64-NEXT:    or a1, a1, a3
+; ZVFH64-NEXT:    or a1, a1, a2
 ; ZVFH64-NEXT:    slli a2, a1, 19
 ; ZVFH64-NEXT:    srli a2, a2, 51
 ; ZVFH64-NEXT:    sw a1, 0(a0)
@@ -267,42 +261,40 @@ define <3 x i15> @fp2ui_v3f32_v3i15(<3 x float> %x) {
 ; ZVFHMIN32:       # %bb.0:
 ; ZVFHMIN32-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
 ; ZVFHMIN32-NEXT:    vfncvt.rtz.x.f.w v9, v8
-; ZVFHMIN32-NEXT:    lui a1, 16
 ; ZVFHMIN32-NEXT:    vslidedown.vi v8, v9, 2
-; ZVFHMIN32-NEXT:    vmv.x.s a2, v9
-; ZVFHMIN32-NEXT:    addi a1, a1, -1
+; ZVFHMIN32-NEXT:    vmv.x.s a1, v9
 ; ZVFHMIN32-NEXT:    vslidedown.vi v9, v9, 1
-; ZVFHMIN32-NEXT:    vmv.x.s a3, v8
-; ZVFHMIN32-NEXT:    and a2, a2, a1
-; ZVFHMIN32-NEXT:    vmv.x.s a4, v9
-; ZVFHMIN32-NEXT:    and a1, a4, a1
-; ZVFHMIN32-NEXT:    slli a4, a3, 17
-; ZVFHMIN32-NEXT:    slli a3, a3, 30
-; ZVFHMIN32-NEXT:    srli a4, a4, 19
-; ZVFHMIN32-NEXT:    slli a1, a1, 15
-; ZVFHMIN32-NEXT:    or a2, a2, a3
-; ZVFHMIN32-NEXT:    or a1, a2, a1
+; ZVFHMIN32-NEXT:    vmv.x.s a2, v8
+; ZVFHMIN32-NEXT:    slli a1, a1, 16
+; ZVFHMIN32-NEXT:    srli a1, a1, 16
+; ZVFHMIN32-NEXT:    slli a3, a2, 30
+; ZVFHMIN32-NEXT:    or a1, a1, a3
+; ZVFHMIN32-NEXT:    vmv.x.s a3, v9
+; ZVFHMIN32-NEXT:    slli a2, a2, 17
+; ZVFHMIN32-NEXT:    slli a3, a3, 16
+; ZVFHMIN32-NEXT:    srli a2, a2, 19
+; ZVFHMIN32-NEXT:    srli a3, a3, 1
+; ZVFHMIN32-NEXT:    or a1, a1, a3
 ; ZVFHMIN32-NEXT:    sw a1, 0(a0)
-; ZVFHMIN32-NEXT:    sh a4, 4(a0)
+; ZVFHMIN32-NEXT:    sh a2, 4(a0)
 ; ZVFHMIN32-NEXT:    ret
 ;
 ; ZVFHMIN64-LABEL: fp2ui_v3f32_v3i15:
 ; ZVFHMIN64:       # %bb.0:
 ; ZVFHMIN64-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
 ; ZVFHMIN64-NEXT:    vfncvt.rtz.x.f.w v9, v8
-; ZVFHMIN64-NEXT:    lui a1, 16
-; ZVFHMIN64-NEXT:    vmv.x.s a2, v9
-; ZVFHMIN64-NEXT:    addiw a1, a1, -1
+; ZVFHMIN64-NEXT:    vmv.x.s a1, v9
 ; ZVFHMIN64-NEXT:    vslidedown.vi v8, v9, 1
 ; ZVFHMIN64-NEXT:    vslidedown.vi v9, v9, 2
-; ZVFHMIN64-NEXT:    and a2, a2, a1
-; ZVFHMIN64-NEXT:    vmv.x.s a3, v8
-; ZVFHMIN64-NEXT:    and a1, a3, a1
+; ZVFHMIN64-NEXT:    slli a1, a1, 48
+; ZVFHMIN64-NEXT:    vmv.x.s a2, v8
 ; ZVFHMIN64-NEXT:    vmv.x.s a3, v9
+; ZVFHMIN64-NEXT:    srli a1, a1, 48
+; ZVFHMIN64-NEXT:    slli a2, a2, 48
 ; ZVFHMIN64-NEXT:    slli a3, a3, 30
-; ZVFHMIN64-NEXT:    slli a1, a1, 15
-; ZVFHMIN64-NEXT:    or a2, a2, a3
-; ZVFHMIN64-NEXT:    or a1, a2, a1
+; ZVFHMIN64-NEXT:    srli a2, a2, 33
+; ZVFHMIN64-NEXT:    or a1, a1, a3
+; ZVFHMIN64-NEXT:    or a1, a1, a2
 ; ZVFHMIN64-NEXT:    slli a2, a1, 19
 ; ZVFHMIN64-NEXT:    srli a2, a2, 51
 ; ZVFHMIN64-NEXT:    sw a1, 0(a0)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
index e9fd0a19e3eb66..139f7b4e6a0c80 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
@@ -3296,11 +3296,11 @@ define <4 x i16> @buildvec_v4i16_pack(i16 %e1, i16 %e2, i16 %e3, i16 %e4) {
 ; RVA22U64-LABEL: buildvec_v4i16_pack:
 ; RVA22U64:       # %bb.0:
 ; RVA22U64-NEXT:    slli a3, a3, 48
-; RVA22U64-NEXT:    zext.h a2, a2
+; RVA22U64-NEXT:    slli a2, a2, 48
 ; RVA22U64-NEXT:    zext.h a0, a0
-; RVA22U64-NEXT:    zext.h a1, a1
-; RVA22U64-NEXT:    slli a2, a2, 32
-; RVA22U64-NEXT:    slli a1, a1, 16
+; RVA22U64-NEXT:    slli a1, a1, 48
+; RVA22U64-NEXT:    srli a2, a2, 16
+; RVA22U64-NEXT:    srli a1, a1, 32
 ; RVA22U64-NEXT:    or a2, a2, a3
 ; RVA22U64-NEXT:    or a0, a0, a1
 ; RVA22U64-NEXT:    or a0, a0, a2
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
index 141d54cf585f28..c6e12c52122d27 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
@@ -3205,88 +3205,86 @@ define <8 x i32> @mgather_baseidx_zext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <
 ;
 ; RV64ZVE32F-LABEL: mgather_baseidx_zext_v8i16_v8i32:
 ; RV64ZVE32F:       # %bb.0:
-; RV64ZVE32F-NEXT:    lui a1, 16
 ; RV64ZVE32F-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
-; RV64ZVE32F-NEXT:    vmv.x.s a2, v0
-; RV64ZVE32F-NEXT:    andi a3, a2, 1
-; RV64ZVE32F-NEXT:    addiw a1, a1, -1
-; RV64ZVE32F-NEXT:    beqz a3, .LBB40_2
+; RV64ZVE32F-NEXT:    vmv.x.s a1, v0
+; RV64ZVE32F-NEXT:    andi a2, a1, 1
+; RV64ZVE32F-NEXT:    beqz a2, .LBB40_2
 ; RV64ZVE32F-NEXT:  # %bb.1: # %cond.load
 ; RV64ZVE32F-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; RV64ZVE32F-NEXT:    vmv.x.s a3, v8
-; RV64ZVE32F-NEXT:    and a3, a3, a1
-; RV64ZVE32F-NEXT:    slli a3, a3, 2
-; RV64ZVE32F-NEXT:    add a3, a0, a3
-; RV64ZVE32F-NEXT:    lw a3, 0(a3)
+; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
+; RV64ZVE32F-NEXT:    slli a2, a2, 48
+; RV64ZVE32F-NEXT:    srli a2, a2, 46
+; RV64ZVE32F-NEXT:    add a2, a0, a2
+; RV64ZVE32F-NEXT:    lw a2, 0(a2)
 ; RV64ZVE32F-NEXT:    vsetvli zero, zero, e32, m4, tu, ma
-; RV64ZVE32F-NEXT:    vmv.s.x v10, a3
+; RV64ZVE32F-NEXT:    vmv.s.x v10, a2
 ; RV64ZVE32F-NEXT:  .LBB40_2: # %else
-; RV64ZVE32F-NEXT:    andi a3, a2, 2
-; RV64ZVE32F-NEXT:    beqz a3, .LBB40_4
+; RV64ZVE32F-NEXT:    andi a2, a1, 2
+; RV64ZVE32F-NEXT:    beqz a2, .LBB40_4
 ; RV64ZVE32F-NEXT:  # %bb.3: # %cond.load1
 ; RV64ZVE32F-NEXT:    vsetivli zero, 1, e16, mf2, ta, ma
 ; RV64ZVE32F-NEXT:    vslidedown.vi v9, v8, 1
-; RV64ZVE32F-NEXT:    vmv.x.s a3, v9
-; RV64ZVE32F-NEXT:    and a3, a3, a1
-; RV64ZVE32F-NEXT:    slli a3, a3, 2
-; RV64ZVE32F-NEXT:    add a3, a0, a3
-; RV64ZVE32F-NEXT:    lw a3, 0(a3)
+; RV64ZVE32F-NEXT:    vmv.x.s a2, v9
+; RV64ZVE32F-NEXT:    slli a2, a2, 48
+; RV64ZVE32F-NEXT:    srli a2, a2, 46
+; RV64ZVE32F-NEXT:    add a2, a0, a2
+; RV64ZVE32F-NEXT:    lw a2, 0(a2)
 ; RV64ZVE32F-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; RV64ZVE32F-NEXT:    vmv.s.x v9, a3
+; RV64ZVE32F-NEXT:    vmv.s.x v9, a2
 ; RV64ZVE32F-NEXT:    vsetivli zero, 2, e32, m1, tu, ma
 ; RV64ZVE32F-NEXT:    vslideup.vi v10, v9, 1
 ; RV64ZVE32F-NEXT:  .LBB40_4: # %else2
 ; RV64ZVE32F-NEXT:    vsetivli zero, 4, e16, m1, ta, ma
 ; RV64ZVE32F-NEXT:    vslidedown.vi v9, v8, 4
-; RV64ZVE32F-NEXT:    andi a3, a2, 4
+; RV64ZVE32F-NEXT:    andi a2, a1, 4
 ; RV64ZVE32F-NEXT:    vsetivli zero, 2, e16, mf2, ta, ma
 ; RV64ZVE32F-NEXT:    vslidedown.vi v8, v8, 2
-; RV64ZVE32F-NEXT:    bnez a3, .LBB40_14
+; RV64ZVE32F-NEXT:    bnez a2, .LBB40_14
 ; RV64ZVE32F-NEXT:  # %bb.5: # %else5
-; RV64ZVE32F-NEXT:    andi a3, a2, 8
-; RV64ZVE32F-NEXT:    bnez a3, .LBB40_15
+; RV64ZVE32F-NEXT:    andi a2, a1, 8
+; RV64ZVE32F-NEXT:    bnez a2, .LBB40_15
 ; RV64ZVE32F-NEXT:  .LBB40_6: # %else8
-; RV64ZVE32F-NEXT:    andi a3, a2, 16
-; RV64ZVE32F-NEXT:    bnez a3, .LBB40_16
+; RV64ZVE32F-NEXT:    andi a2, a1, 16
+; RV64ZVE32F-NEXT:    bnez a2, .LBB40_16
 ; RV64ZVE32F-NEXT:  .LBB40_7: # %else11
-; RV64ZVE32F-NEXT:    andi a3, a2, 32
-; RV64ZVE32F-NEXT:    beqz a3, .LBB40_9
+; RV64ZVE32F-NEXT:    andi a2, a1, 32
+; RV64ZVE32F-NEXT:    beqz a2, .LBB40_9
 ; RV64ZVE32F-NEXT:  .LBB40_8: # %cond.load13
 ; RV64ZVE32F-NEXT:    vsetivli zero, 1, e16, mf2, ta, ma
 ; RV64ZVE32F-NEXT:    vslidedown.vi v8, v9, 1
-; RV64ZVE32F-NEXT:    vmv.x.s a3, v8
-; RV64ZVE32F-NEXT:    and a3, a3, a1
-; RV64ZVE32F-NEXT:    slli a3, a3, 2
-; RV64ZVE32F-NEXT:    add a3, a0, a3
-; RV64ZVE32F-NEXT:    lw a3, 0(a3)
+; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
+; RV64ZVE32F-NEXT:    slli a2, a2, 48
+; RV64ZVE32F-NEXT:    srli a2, a2, 46
+; RV64ZVE32F-NEXT:    add a2, a0, a2
+; RV64ZVE32F-NEXT:    lw a2, 0(a2)
 ; RV64ZVE32F-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; RV64ZVE32F-NEXT:    vmv.s.x v12, a3
+; RV64ZVE32F-NEXT:    vmv.s.x v12, a2
 ; RV64ZVE32F-NEXT:    vsetivli zero, 6, e32, m2, tu, ma
 ; RV64ZVE32F-NEXT:    vslideup.vi v10, v12, 5
 ; RV64ZVE32F-NEXT:  .LBB40_9: # %else14
-; RV64ZVE32F-NEXT:    andi a3, a2, 64
+; RV64ZVE32F-NEXT:    andi a2, a1, 64
 ; RV64ZVE32F-NEXT:    vsetivli zero, 2, e16, mf2, ta, ma
 ; RV64ZVE32F-NEXT:    vslidedown.vi v8, v9, 2
-; RV64ZVE32F-NEXT:    beqz a3, .LBB40_11
+; RV64ZVE32F-NEXT:    beqz a2, .LBB40_11
 ; RV64ZVE32F-NEXT:  # %bb.10: # %cond.load16
-; RV64ZVE32F-NEXT:    vmv.x.s a3, v8
-; RV64ZVE32F-NEXT:    and a3, a3, a1
-; RV64ZVE32F-NEXT:    slli a3, a3, 2
-; RV64ZVE32F-NEXT:    add a3, a0, a3
-; RV64ZVE32F-NEXT:    lw a3, 0(a3)
+; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
+; RV64ZVE32F-NEXT:    slli a2, a2, 48
+; RV64ZVE32F-NEXT:    srli a2, a2, 46
+; RV64ZVE32F-NEXT:    add a2, a0, a2
+; RV64ZVE32F-NEXT:    lw a2, 0(a2)
 ; RV64ZVE32F-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; RV64ZVE32F-NEXT:    vmv.s.x v12, a3
+; RV64ZVE32F-NEXT:    vmv.s.x v12, a2
 ; RV64ZVE32F-NEXT:    vsetivli zero, 7, e32, m2, tu, ma
 ; RV64ZVE32F-NEXT:    vslideup.vi v10, v12, 6
 ; RV64ZVE32F-NEXT:  .LBB40_11: # %else17
-; RV64ZVE32F-NEXT:    andi a2, a2, -128
-; RV64ZVE32F-NEXT:    beqz a2, .LBB40_13
+; RV64ZVE32F-NEXT:    andi a1, a1, -128
+; RV64ZVE32F-NEXT:    beqz a1, .LBB40_13
 ; RV64ZVE32F-NEXT:  # %bb.12: # %cond.load19
 ; RV64ZVE32F-NEXT:    vsetivli zero, 1, e16, mf2, ta, ma
 ; RV64ZVE32F-NEXT:    vslidedown.vi v8, v8, 1
-; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
-; RV64ZVE32F-NEXT:    and a1, a2, a1
-; RV64ZVE32F-NEXT:    slli a1, a1, 2
+; RV64ZVE32F-NEXT:    vmv.x.s a1, v8
+; RV64ZVE32F-NEXT:    slli a1, a1, 48
+; RV64ZVE32F-NEXT:    srli a1, a1, 46
 ; RV64ZVE32F-NEXT:    add a0, a0, a1
 ; RV64ZVE32F-NEXT:    lw a0, 0(a0)
 ; RV64ZVE32F-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
@@ -3298,44 +3296,44 @@ define <8 x i32> @mgather_baseidx_zext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <
 ; RV64ZVE32F-NEXT:    vmv2r.v v8, v10
 ; RV64ZVE32F-NEXT:    ret
 ; RV64ZVE32F-NEXT:  .LBB40_14: # %cond.load4
-; RV64ZVE32F-NEXT:    vmv.x.s a3, v8
-; RV64ZVE32F-NEXT:    and a3, a3, a1
-; RV64ZVE32F-NEXT:    slli a3, a3, 2
-; RV64ZVE32F-NEXT:    add a3, a0, a3
-; RV64ZVE32F-NEXT:    lw a3, 0(a3)
+; RV64ZVE32F-NEXT:    vmv.x.s a2, v8
+; RV64ZVE32F-NEXT:    slli a2, a2, 48
+; RV64ZVE32F-NEXT:    srli a2, a2, 46
+; RV64ZVE32F-NEXT:    add a2, a0, a2
+; RV64ZVE32F-NEXT:    lw a2, 0(a2)
 ; RV64ZVE32F-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; RV64ZVE32F-NEXT:    vmv.s.x v12, a3
+; RV64ZVE32F-NEXT:    vmv.s.x v12, a2
 ; RV64ZVE32F-NEXT:    vsetivli zero, 3, e32, m1, tu, ma
 ; RV64ZVE32F-NEXT:    vslideup.vi v10, v12, 2
-; RV64ZVE32F-NEXT:    andi a3, a2, 8
-; RV64ZVE32F-NEXT:    beqz a3, .LBB40_6
+; RV64ZVE32F-NEXT:    andi a2, a1, 8
+; RV64ZVE...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/119567


More information about the llvm-commits mailing list