[llvm] fc95de3 - [RISCV] Require alignment when forming gather with larger element type

Tue Sep 19 11:00:51 PDT 2023

Author: Philip Reames
Date: 2023-09-19T11:00:42-07:00
New Revision: fc95de38d91b92987b2edeb3be3801485cb0b400

URL: https://github.com/llvm/llvm-project/commit/fc95de38d91b92987b2edeb3be3801485cb0b400
DIFF: https://github.com/llvm/llvm-project/commit/fc95de38d91b92987b2edeb3be3801485cb0b400.diff

LOG: [RISCV] Require alignment when forming gather with larger element type

This fixes a bug in my 928564caa5de8b07cede51e45499934777b9938c that didn't get noticed in review.  I found it when looking at the strided load case (upcoming patch), and realized the previous commit was buggy too.

p.s. Sorry for the slightly confusing test diff.  I'd apparently used the wrong mask for the aligned positive test; it was actually unaligned.  Didn't seem worthy of a separate precommit.

Added: 
    

Modified: 
    llvm/lib/Target/RISCV/RISCVISelLowering.cpp
    llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 0ab3b591c7b1a6b..0214bd1d7dda326 100644

--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -13623,10 +13623,11 @@ static bool matchIndexAsWiderOp(EVT VT, SDValue Index, SDValue Mask,
     // TODO: This offset check is too strict if we support fully
     // misaligned memory operations.
     uint64_t C = Index->getConstantOperandVal(i);
-    if (C % ElementSize != 0)
-      return false;
-    if (i % 2 == 0)
+    if (i % 2 == 0) {
+      if (C % WiderElementSize != 0)
+        return false;
       continue;
+    }
     uint64_t Last = Index->getConstantOperandVal(i-1);
     if (C != Last + ElementSize)
       return false;

diff  --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
index 0db7c2ebac3db88..813e16952eca33c 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
@@ -14040,7 +14040,6 @@ define <8 x i16> @mgather_gather_2xSEW(ptr %base) {
 ; RV32-LABEL: mgather_gather_2xSEW:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    lui a1, 16513
-; RV32-NEXT:    addi a1, a1, 512
 ; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
 ; RV32-NEXT:    vmv.s.x v9, a1
 ; RV32-NEXT:    vluxei8.v v8, (a0), v9
@@ -14049,7 +14048,6 @@ define <8 x i16> @mgather_gather_2xSEW(ptr %base) {
 ; RV64V-LABEL: mgather_gather_2xSEW:
 ; RV64V:       # %bb.0:
 ; RV64V-NEXT:    lui a1, 16513
-; RV64V-NEXT:    addiw a1, a1, 512
 ; RV64V-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
 ; RV64V-NEXT:    vmv.s.x v9, a1
 ; RV64V-NEXT:    vluxei8.v v8, (a0), v9
@@ -14099,7 +14097,7 @@ define <8 x i16> @mgather_gather_2xSEW(ptr %base) {
 ; RV64ZVE32F-NEXT:    andi a2, a1, 4
 ; RV64ZVE32F-NEXT:    beqz a2, .LBB112_3
 ; RV64ZVE32F-NEXT:  .LBB112_11: # %cond.load4
-; RV64ZVE32F-NEXT:    addi a2, a0, 18
+; RV64ZVE32F-NEXT:    addi a2, a0, 16
 ; RV64ZVE32F-NEXT:    lh a2, 0(a2)
 ; RV64ZVE32F-NEXT:    vsetivli zero, 3, e16, m1, tu, ma
 ; RV64ZVE32F-NEXT:    vmv.s.x v9, a2
@@ -14107,7 +14105,7 @@ define <8 x i16> @mgather_gather_2xSEW(ptr %base) {
 ; RV64ZVE32F-NEXT:    andi a2, a1, 8
 ; RV64ZVE32F-NEXT:    beqz a2, .LBB112_4
 ; RV64ZVE32F-NEXT:  .LBB112_12: # %cond.load7
-; RV64ZVE32F-NEXT:    addi a2, a0, 20
+; RV64ZVE32F-NEXT:    addi a2, a0, 18
 ; RV64ZVE32F-NEXT:    lh a2, 0(a2)
 ; RV64ZVE32F-NEXT:    vsetivli zero, 4, e16, m1, tu, ma
 ; RV64ZVE32F-NEXT:    vmv.s.x v9, a2
@@ -14147,7 +14145,7 @@ define <8 x i16> @mgather_gather_2xSEW(ptr %base) {
 ; RV64ZVE32F-NEXT:    ret
   %head = insertelement <8 x i1> poison, i1 true, i16 0
   %allones = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
-  %ptrs = getelementptr inbounds i16, ptr %base, <8 x i32>  <i32 0, i32 1, i32 9, i32 10, i32 4, i32 5, i32 2, i32 3>
+  %ptrs = getelementptr inbounds i16, ptr %base, <8 x i32>  <i32 0, i32 1, i32 8, i32 9, i32 4, i32 5, i32 2, i32 3>
   %v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %allones, <8 x i16> poison)
   ret <8 x i16> %v
 }
@@ -14274,19 +14272,19 @@ define <8 x i16> @mgather_gather_2xSEW_unaligned(ptr %base) {
 define <8 x i16> @mgather_gather_2xSEW_unaligned2(ptr %base) {
 ; RV32-LABEL: mgather_gather_2xSEW_unaligned2:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    lui a1, 16513
-; RV32-NEXT:    addi a1, a1, 514
-; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT:    vmv.s.x v9, a1
+; RV32-NEXT:    lui a1, %hi(.LCPI114_0)
+; RV32-NEXT:    addi a1, a1, %lo(.LCPI114_0)
+; RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; RV32-NEXT:    vle8.v v9, (a1)
 ; RV32-NEXT:    vluxei8.v v8, (a0), v9
 ; RV32-NEXT:    ret
 ;
 ; RV64V-LABEL: mgather_gather_2xSEW_unaligned2:
 ; RV64V:       # %bb.0:
-; RV64V-NEXT:    lui a1, 16513
-; RV64V-NEXT:    addiw a1, a1, 514
-; RV64V-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; RV64V-NEXT:    vmv.s.x v9, a1
+; RV64V-NEXT:    lui a1, %hi(.LCPI114_0)
+; RV64V-NEXT:    addi a1, a1, %lo(.LCPI114_0)
+; RV64V-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; RV64V-NEXT:    vle8.v v9, (a1)
 ; RV64V-NEXT:    vluxei8.v v8, (a0), v9
 ; RV64V-NEXT:    ret
 ;