[llvm] 1c6ea96 - [AArch64] Fix load-insert-zero patterns with i8 and negative offsets.

David Green via llvm-commits llvm-commits at lists.llvm.org
Wed Mar 8 04:48:26 PST 2023


Author: David Green
Date: 2023-03-08T12:48:21Z
New Revision: 1c6ea961938488997712763762079e535b8b704e

URL: https://github.com/llvm/llvm-project/commit/1c6ea961938488997712763762079e535b8b704e
DIFF: https://github.com/llvm/llvm-project/commit/1c6ea961938488997712763762079e535b8b704e.diff

LOG: [AArch64] Fix load-insert-zero patterns with i8 and negative offsets.

These should have been using the LDURBi instructions where the offset is
negative, as reported from the reproducer in D144086.

Added: 
    

Modified: 
    llvm/lib/Target/AArch64/AArch64InstrInfo.td
    llvm/test/CodeGen/AArch64/load-insert-zero.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index c3fa7df8d2d6b..f31def209dac6 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -3358,7 +3358,7 @@ multiclass LoadInsertZeroPatterns<SDPatternOperator LoadOp, ValueType VT, ValueT
              (SUBREG_TO_REG (i64 0), (UnscaledLoadInst GPR64sp:$Rn, simm9:$offset), SubReg)>;
 }
 
-defm : LoadInsertZeroPatterns<extloadi8,  v16i8,  v8i8,   nxv16i8,  i32,  LDRBui, LDRBui,
+defm : LoadInsertZeroPatterns<extloadi8,  v16i8,  v8i8,   nxv16i8,  i32,  LDRBui, LDURBi,
                               am_indexed8,  am_unscaled8,  uimm12s1, bsub>;
 defm : LoadInsertZeroPatterns<extloadi16, v8i16,  v4i16,  nxv8i16,  i32,  LDRHui, LDURHi,
                               am_indexed16, am_unscaled16, uimm12s2, hsub>;

diff  --git a/llvm/test/CodeGen/AArch64/load-insert-zero.ll b/llvm/test/CodeGen/AArch64/load-insert-zero.ll
index 1adbe69c76f90..bc21d8b5201a2 100644
--- a/llvm/test/CodeGen/AArch64/load-insert-zero.ll
+++ b/llvm/test/CodeGen/AArch64/load-insert-zero.ll
@@ -301,6 +301,162 @@ define <2 x double> @loadv2f64_offset(ptr %p) {
 }
 
 
+define <8 x i8> @loadv8i8_noffset(ptr %p) {
+; CHECK-LABEL: loadv8i8_noffset:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldur b0, [x0, #-1]
+; CHECK-NEXT:    ret
+  %g = getelementptr inbounds i8, ptr %p, i64 -1
+  %l = load i8, ptr %g
+  %v = insertelement <8 x i8> zeroinitializer, i8 %l, i32 0
+  ret <8 x i8> %v
+}
+
+define <16 x i8> @loadv16i8_noffset(ptr %p) {
+; CHECK-LABEL: loadv16i8_noffset:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldur b0, [x0, #-1]
+; CHECK-NEXT:    ret
+  %g = getelementptr inbounds i8, ptr %p, i64 -1
+  %l = load i8, ptr %g
+  %v = insertelement <16 x i8> zeroinitializer, i8 %l, i32 0
+  ret <16 x i8> %v
+}
+
+define <4 x i16> @loadv4i16_noffset(ptr %p) {
+; CHECK-LABEL: loadv4i16_noffset:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldur h0, [x0, #-1]
+; CHECK-NEXT:    ret
+  %g = getelementptr inbounds i8, ptr %p, i64 -1
+  %l = load i16, ptr %g
+  %v = insertelement <4 x i16> zeroinitializer, i16 %l, i32 0
+  ret <4 x i16> %v
+}
+
+define <8 x i16> @loadv8i16_noffset(ptr %p) {
+; CHECK-LABEL: loadv8i16_noffset:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldur h0, [x0, #-1]
+; CHECK-NEXT:    ret
+  %g = getelementptr inbounds i8, ptr %p, i64 -1
+  %l = load i16, ptr %g
+  %v = insertelement <8 x i16> zeroinitializer, i16 %l, i32 0
+  ret <8 x i16> %v
+}
+
+define <2 x i32> @loadv2i32_noffset(ptr %p) {
+; CHECK-LABEL: loadv2i32_noffset:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldur s0, [x0, #-1]
+; CHECK-NEXT:    ret
+  %g = getelementptr inbounds i8, ptr %p, i64 -1
+  %l = load i32, ptr %g
+  %v = insertelement <2 x i32> zeroinitializer, i32 %l, i32 0
+  ret <2 x i32> %v
+}
+
+define <4 x i32> @loadv4i32_noffset(ptr %p) {
+; CHECK-LABEL: loadv4i32_noffset:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldur s0, [x0, #-1]
+; CHECK-NEXT:    ret
+  %g = getelementptr inbounds i8, ptr %p, i64 -1
+  %l = load i32, ptr %g
+  %v = insertelement <4 x i32> zeroinitializer, i32 %l, i32 0
+  ret <4 x i32> %v
+}
+
+define <2 x i64> @loadv2i64_noffset(ptr %p) {
+; CHECK-LABEL: loadv2i64_noffset:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldur d0, [x0, #-1]
+; CHECK-NEXT:    ret
+  %g = getelementptr inbounds i8, ptr %p, i64 -1
+  %l = load i64, ptr %g
+  %v = insertelement <2 x i64> zeroinitializer, i64 %l, i32 0
+  ret <2 x i64> %v
+}
+
+
+define <4 x half> @loadv4f16_noffset(ptr %p) {
+; CHECK-LABEL: loadv4f16_noffset:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldur h0, [x0, #-1]
+; CHECK-NEXT:    ret
+  %g = getelementptr inbounds i8, ptr %p, i64 -1
+  %l = load half, ptr %g
+  %v = insertelement <4 x half> zeroinitializer, half %l, i32 0
+  ret <4 x half> %v
+}
+
+define <8 x half> @loadv8f16_noffset(ptr %p) {
+; CHECK-LABEL: loadv8f16_noffset:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldur h0, [x0, #-1]
+; CHECK-NEXT:    ret
+  %g = getelementptr inbounds i8, ptr %p, i64 -1
+  %l = load half, ptr %g
+  %v = insertelement <8 x half> zeroinitializer, half %l, i32 0
+  ret <8 x half> %v
+}
+
+define <4 x bfloat> @loadv4bf16_noffset(ptr %p) {
+; CHECK-LABEL: loadv4bf16_noffset:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldur h0, [x0, #-1]
+; CHECK-NEXT:    ret
+  %g = getelementptr inbounds i8, ptr %p, i64 -1
+  %l = load bfloat, ptr %g
+  %v = insertelement <4 x bfloat> zeroinitializer, bfloat %l, i32 0
+  ret <4 x bfloat> %v
+}
+
+define <8 x bfloat> @loadv8bf16_noffset(ptr %p) {
+; CHECK-LABEL: loadv8bf16_noffset:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldur h0, [x0, #-1]
+; CHECK-NEXT:    ret
+  %g = getelementptr inbounds i8, ptr %p, i64 -1
+  %l = load bfloat, ptr %g
+  %v = insertelement <8 x bfloat> zeroinitializer, bfloat %l, i32 0
+  ret <8 x bfloat> %v
+}
+
+define <2 x float> @loadv2f32_noffset(ptr %p) {
+; CHECK-LABEL: loadv2f32_noffset:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldur s0, [x0, #-1]
+; CHECK-NEXT:    ret
+  %g = getelementptr inbounds i8, ptr %p, i64 -1
+  %l = load float, ptr %g
+  %v = insertelement <2 x float> zeroinitializer, float %l, i32 0
+  ret <2 x float> %v
+}
+
+define <4 x float> @loadv4f32_noffset(ptr %p) {
+; CHECK-LABEL: loadv4f32_noffset:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldur s0, [x0, #-1]
+; CHECK-NEXT:    ret
+  %g = getelementptr inbounds i8, ptr %p, i64 -1
+  %l = load float, ptr %g
+  %v = insertelement <4 x float> zeroinitializer, float %l, i32 0
+  ret <4 x float> %v
+}
+
+define <2 x double> @loadv2f64_noffset(ptr %p) {
+; CHECK-LABEL: loadv2f64_noffset:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldur d0, [x0, #-1]
+; CHECK-NEXT:    ret
+  %g = getelementptr inbounds i8, ptr %p, i64 -1
+  %l = load double, ptr %g
+  %v = insertelement <2 x double> zeroinitializer, double %l, i32 0
+  ret <2 x double> %v
+}
+
+
 define void @predictor_4x4_neon(ptr nocapture noundef writeonly %0, i64 noundef %1, ptr nocapture noundef readonly %2, ptr nocapture noundef readnone %3) {
 ; CHECK-LABEL: predictor_4x4_neon:
 ; CHECK:       // %bb.0:


        


More information about the llvm-commits mailing list