[llvm] 1c6ea96 - [AArch64] Fix load-insert-zero patterns with i8 and negative offsets.
David Green via llvm-commits
llvm-commits at lists.llvm.org
Wed Mar 8 04:48:26 PST 2023
Author: David Green
Date: 2023-03-08T12:48:21Z
New Revision: 1c6ea961938488997712763762079e535b8b704e
URL: https://github.com/llvm/llvm-project/commit/1c6ea961938488997712763762079e535b8b704e
DIFF: https://github.com/llvm/llvm-project/commit/1c6ea961938488997712763762079e535b8b704e.diff
LOG: [AArch64] Fix load-insert-zero patterns with i8 and negative offsets.
These should have been using the LDURBi instructions where the offset is
negative, as reported from the reproducer in D144086.
Added:
Modified:
llvm/lib/Target/AArch64/AArch64InstrInfo.td
llvm/test/CodeGen/AArch64/load-insert-zero.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index c3fa7df8d2d6b..f31def209dac6 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -3358,7 +3358,7 @@ multiclass LoadInsertZeroPatterns<SDPatternOperator LoadOp, ValueType VT, ValueT
(SUBREG_TO_REG (i64 0), (UnscaledLoadInst GPR64sp:$Rn, simm9:$offset), SubReg)>;
}
-defm : LoadInsertZeroPatterns<extloadi8, v16i8, v8i8, nxv16i8, i32, LDRBui, LDRBui,
+defm : LoadInsertZeroPatterns<extloadi8, v16i8, v8i8, nxv16i8, i32, LDRBui, LDURBi,
am_indexed8, am_unscaled8, uimm12s1, bsub>;
defm : LoadInsertZeroPatterns<extloadi16, v8i16, v4i16, nxv8i16, i32, LDRHui, LDURHi,
am_indexed16, am_unscaled16, uimm12s2, hsub>;
diff --git a/llvm/test/CodeGen/AArch64/load-insert-zero.ll b/llvm/test/CodeGen/AArch64/load-insert-zero.ll
index 1adbe69c76f90..bc21d8b5201a2 100644
--- a/llvm/test/CodeGen/AArch64/load-insert-zero.ll
+++ b/llvm/test/CodeGen/AArch64/load-insert-zero.ll
@@ -301,6 +301,162 @@ define <2 x double> @loadv2f64_offset(ptr %p) {
}
+define <8 x i8> @loadv8i8_noffset(ptr %p) {
+; CHECK-LABEL: loadv8i8_noffset:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldur b0, [x0, #-1]
+; CHECK-NEXT: ret
+ %g = getelementptr inbounds i8, ptr %p, i64 -1
+ %l = load i8, ptr %g
+ %v = insertelement <8 x i8> zeroinitializer, i8 %l, i32 0
+ ret <8 x i8> %v
+}
+
+define <16 x i8> @loadv16i8_noffset(ptr %p) {
+; CHECK-LABEL: loadv16i8_noffset:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldur b0, [x0, #-1]
+; CHECK-NEXT: ret
+ %g = getelementptr inbounds i8, ptr %p, i64 -1
+ %l = load i8, ptr %g
+ %v = insertelement <16 x i8> zeroinitializer, i8 %l, i32 0
+ ret <16 x i8> %v
+}
+
+define <4 x i16> @loadv4i16_noffset(ptr %p) {
+; CHECK-LABEL: loadv4i16_noffset:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldur h0, [x0, #-1]
+; CHECK-NEXT: ret
+ %g = getelementptr inbounds i8, ptr %p, i64 -1
+ %l = load i16, ptr %g
+ %v = insertelement <4 x i16> zeroinitializer, i16 %l, i32 0
+ ret <4 x i16> %v
+}
+
+define <8 x i16> @loadv8i16_noffset(ptr %p) {
+; CHECK-LABEL: loadv8i16_noffset:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldur h0, [x0, #-1]
+; CHECK-NEXT: ret
+ %g = getelementptr inbounds i8, ptr %p, i64 -1
+ %l = load i16, ptr %g
+ %v = insertelement <8 x i16> zeroinitializer, i16 %l, i32 0
+ ret <8 x i16> %v
+}
+
+define <2 x i32> @loadv2i32_noffset(ptr %p) {
+; CHECK-LABEL: loadv2i32_noffset:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldur s0, [x0, #-1]
+; CHECK-NEXT: ret
+ %g = getelementptr inbounds i8, ptr %p, i64 -1
+ %l = load i32, ptr %g
+ %v = insertelement <2 x i32> zeroinitializer, i32 %l, i32 0
+ ret <2 x i32> %v
+}
+
+define <4 x i32> @loadv4i32_noffset(ptr %p) {
+; CHECK-LABEL: loadv4i32_noffset:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldur s0, [x0, #-1]
+; CHECK-NEXT: ret
+ %g = getelementptr inbounds i8, ptr %p, i64 -1
+ %l = load i32, ptr %g
+ %v = insertelement <4 x i32> zeroinitializer, i32 %l, i32 0
+ ret <4 x i32> %v
+}
+
+define <2 x i64> @loadv2i64_noffset(ptr %p) {
+; CHECK-LABEL: loadv2i64_noffset:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldur d0, [x0, #-1]
+; CHECK-NEXT: ret
+ %g = getelementptr inbounds i8, ptr %p, i64 -1
+ %l = load i64, ptr %g
+ %v = insertelement <2 x i64> zeroinitializer, i64 %l, i32 0
+ ret <2 x i64> %v
+}
+
+
+define <4 x half> @loadv4f16_noffset(ptr %p) {
+; CHECK-LABEL: loadv4f16_noffset:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldur h0, [x0, #-1]
+; CHECK-NEXT: ret
+ %g = getelementptr inbounds i8, ptr %p, i64 -1
+ %l = load half, ptr %g
+ %v = insertelement <4 x half> zeroinitializer, half %l, i32 0
+ ret <4 x half> %v
+}
+
+define <8 x half> @loadv8f16_noffset(ptr %p) {
+; CHECK-LABEL: loadv8f16_noffset:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldur h0, [x0, #-1]
+; CHECK-NEXT: ret
+ %g = getelementptr inbounds i8, ptr %p, i64 -1
+ %l = load half, ptr %g
+ %v = insertelement <8 x half> zeroinitializer, half %l, i32 0
+ ret <8 x half> %v
+}
+
+define <4 x bfloat> @loadv4bf16_noffset(ptr %p) {
+; CHECK-LABEL: loadv4bf16_noffset:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldur h0, [x0, #-1]
+; CHECK-NEXT: ret
+ %g = getelementptr inbounds i8, ptr %p, i64 -1
+ %l = load bfloat, ptr %g
+ %v = insertelement <4 x bfloat> zeroinitializer, bfloat %l, i32 0
+ ret <4 x bfloat> %v
+}
+
+define <8 x bfloat> @loadv8bf16_noffset(ptr %p) {
+; CHECK-LABEL: loadv8bf16_noffset:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldur h0, [x0, #-1]
+; CHECK-NEXT: ret
+ %g = getelementptr inbounds i8, ptr %p, i64 -1
+ %l = load bfloat, ptr %g
+ %v = insertelement <8 x bfloat> zeroinitializer, bfloat %l, i32 0
+ ret <8 x bfloat> %v
+}
+
+define <2 x float> @loadv2f32_noffset(ptr %p) {
+; CHECK-LABEL: loadv2f32_noffset:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldur s0, [x0, #-1]
+; CHECK-NEXT: ret
+ %g = getelementptr inbounds i8, ptr %p, i64 -1
+ %l = load float, ptr %g
+ %v = insertelement <2 x float> zeroinitializer, float %l, i32 0
+ ret <2 x float> %v
+}
+
+define <4 x float> @loadv4f32_noffset(ptr %p) {
+; CHECK-LABEL: loadv4f32_noffset:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldur s0, [x0, #-1]
+; CHECK-NEXT: ret
+ %g = getelementptr inbounds i8, ptr %p, i64 -1
+ %l = load float, ptr %g
+ %v = insertelement <4 x float> zeroinitializer, float %l, i32 0
+ ret <4 x float> %v
+}
+
+define <2 x double> @loadv2f64_noffset(ptr %p) {
+; CHECK-LABEL: loadv2f64_noffset:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldur d0, [x0, #-1]
+; CHECK-NEXT: ret
+ %g = getelementptr inbounds i8, ptr %p, i64 -1
+ %l = load double, ptr %g
+ %v = insertelement <2 x double> zeroinitializer, double %l, i32 0
+ ret <2 x double> %v
+}
+
+
define void @predictor_4x4_neon(ptr nocapture noundef writeonly %0, i64 noundef %1, ptr nocapture noundef readonly %2, ptr nocapture noundef readnone %3) {
; CHECK-LABEL: predictor_4x4_neon:
; CHECK: // %bb.0:
More information about the llvm-commits
mailing list