[llvm] 6f91bfc - [LLVM][AArch64ISel] Fix IsLE predicate setting so it does not affect BE codegen. (#135978)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Apr 17 03:43:14 PDT 2025
Author: Paul Walker
Date: 2025-04-17T11:43:10+01:00
New Revision: 6f91bfcc8aebe61ba4469c48270928f82ee89027
URL: https://github.com/llvm/llvm-project/commit/6f91bfcc8aebe61ba4469c48270928f82ee89027
DIFF: https://github.com/llvm/llvm-project/commit/6f91bfcc8aebe61ba4469c48270928f82ee89027.diff
LOG: [LLVM][AArch64ISel] Fix IsLE predicate setting so it does not affect BE codegen. (#135978)
Ensure little endian specific patterns, not just their multi-classes,
are protected by IsLE.
Added:
Modified:
llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
llvm/test/CodeGen/AArch64/sve-ld1-addressing-mode-reg-reg.ll
llvm/test/CodeGen/AArch64/sve-st1-addressing-mode-reg-reg.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index e5d99037b6c63..d13728ec930c8 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -2752,7 +2752,7 @@ let Predicates = [HasSVE_or_SME] in {
// For big endian, only BITCASTs involving same sized vector types with same
// size vector elements can be isel'd directly.
- let Predicates = [IsLE] in
+ let Predicates = [HasSVE_or_SME, IsLE] in
foreach VT = [ nxv16i8, nxv8i16, nxv4i32, nxv2i64, nxv8f16, nxv4f32, nxv2f64, nxv8bf16 ] in
foreach VT2 = [ nxv16i8, nxv8i16, nxv4i32, nxv2i64, nxv8f16, nxv4f32, nxv2f64, nxv8bf16 ] in
if !ne(VT,VT2) then
@@ -3002,24 +3002,25 @@ let Predicates = [HasSVE_or_SME] in {
defm : unpred_load< load, nxv2f32, LD1W_D, LD1W_D_IMM, PTRUE_D, am_sve_regreg_lsl2>;
defm : unpred_load< load, nxv2f64, LD1D, LD1D_IMM, PTRUE_D, am_sve_regreg_lsl3>;
- // Allow using the reg+reg form of ld1b/st1b for memory accesses with the
- // same width as nxv16i8. This saves an add in cases where we would
- // otherwise compute the address separately.
- multiclass unpred_loadstore_bitcast<ValueType Ty> {
- let Predicates = [IsLE] in {
+ let Predicates = [HasSVE_or_SME, IsLE] in {
+ // Allow using the reg+reg form of ld1b/st1b for memory accesses with the
+ // same width as nxv16i8. This saves an add in cases where we would
+ // otherwise compute the address separately.
+ multiclass unpred_loadstore_bitcast<ValueType Ty> {
def : Pat<(Ty (load (am_sve_regreg_lsl0 GPR64sp:$base, GPR64:$offset))),
(LD1B (PTRUE_B 31), GPR64sp:$base, GPR64:$offset)>;
def : Pat<(store Ty:$val, (am_sve_regreg_lsl0 GPR64sp:$base, GPR64:$offset)),
(ST1B ZPR:$val, (PTRUE_B 31), GPR64sp:$base, GPR64:$offset)>;
}
+
+ defm : unpred_loadstore_bitcast<nxv8i16>;
+ defm : unpred_loadstore_bitcast<nxv8f16>;
+ defm : unpred_loadstore_bitcast<nxv8bf16>;
+ defm : unpred_loadstore_bitcast<nxv4f32>;
+ defm : unpred_loadstore_bitcast<nxv4i32>;
+ defm : unpred_loadstore_bitcast<nxv2i64>;
+ defm : unpred_loadstore_bitcast<nxv2f64>;
}
- defm : unpred_loadstore_bitcast<nxv8i16>;
- defm : unpred_loadstore_bitcast<nxv8f16>;
- defm : unpred_loadstore_bitcast<nxv8bf16>;
- defm : unpred_loadstore_bitcast<nxv4f32>;
- defm : unpred_loadstore_bitcast<nxv4i32>;
- defm : unpred_loadstore_bitcast<nxv2i64>;
- defm : unpred_loadstore_bitcast<nxv2f64>;
// Allow using LDR/STR to avoid the predicate dependence.
let Predicates = [HasSVE_or_SME, IsLE, AllowMisalignedMemAccesses] in
diff --git a/llvm/test/CodeGen/AArch64/sve-ld1-addressing-mode-reg-reg.ll b/llvm/test/CodeGen/AArch64/sve-ld1-addressing-mode-reg-reg.ll
index 3f31917b125b7..05abfa319d389 100644
--- a/llvm/test/CodeGen/AArch64/sve-ld1-addressing-mode-reg-reg.ll
+++ b/llvm/test/CodeGen/AArch64/sve-ld1-addressing-mode-reg-reg.ll
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve,+bf16 < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve,+bf16 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-LE
+; RUN: llc -mtriple=aarch64_be-linux-gnu -mattr=+sve,+bf16 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-BE
; LD1B
@@ -15,33 +16,54 @@ define <vscale x 16 x i8> @ld1_nxv16i8(ptr %addr, i64 %off) {
}
define <vscale x 8 x i16> @ld1_nxv16i8_bitcast_to_i16(ptr %addr, i64 %off) {
-; CHECK-LABEL: ld1_nxv16i8_bitcast_to_i16:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p0.b
-; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0, x1]
-; CHECK-NEXT: ret
+; CHECK-LE-LABEL: ld1_nxv16i8_bitcast_to_i16:
+; CHECK-LE: // %bb.0:
+; CHECK-LE-NEXT: ptrue p0.b
+; CHECK-LE-NEXT: ld1b { z0.b }, p0/z, [x0, x1]
+; CHECK-LE-NEXT: ret
+;
+; CHECK-BE-LABEL: ld1_nxv16i8_bitcast_to_i16:
+; CHECK-BE: // %bb.0:
+; CHECK-BE-NEXT: ptrue p0.h
+; CHECK-BE-NEXT: add x8, x0, x1
+; CHECK-BE-NEXT: ld1h { z0.h }, p0/z, [x8]
+; CHECK-BE-NEXT: ret
%ptr = getelementptr inbounds i8, ptr %addr, i64 %off
%val = load volatile <vscale x 8 x i16>, ptr %ptr
ret <vscale x 8 x i16> %val
}
define <vscale x 4 x i32> @ld1_nxv16i8_bitcast_to_i32(ptr %addr, i64 %off) {
-; CHECK-LABEL: ld1_nxv16i8_bitcast_to_i32:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p0.b
-; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0, x1]
-; CHECK-NEXT: ret
+; CHECK-LE-LABEL: ld1_nxv16i8_bitcast_to_i32:
+; CHECK-LE: // %bb.0:
+; CHECK-LE-NEXT: ptrue p0.b
+; CHECK-LE-NEXT: ld1b { z0.b }, p0/z, [x0, x1]
+; CHECK-LE-NEXT: ret
+;
+; CHECK-BE-LABEL: ld1_nxv16i8_bitcast_to_i32:
+; CHECK-BE: // %bb.0:
+; CHECK-BE-NEXT: ptrue p0.s
+; CHECK-BE-NEXT: add x8, x0, x1
+; CHECK-BE-NEXT: ld1w { z0.s }, p0/z, [x8]
+; CHECK-BE-NEXT: ret
%ptr = getelementptr inbounds i8, ptr %addr, i64 %off
%val = load volatile <vscale x 4 x i32>, ptr %ptr
ret <vscale x 4 x i32> %val
}
define <vscale x 2 x i64> @ld1_nxv16i8_bitcast_to_i64(ptr %addr, i64 %off) {
-; CHECK-LABEL: ld1_nxv16i8_bitcast_to_i64:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p0.b
-; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0, x1]
-; CHECK-NEXT: ret
+; CHECK-LE-LABEL: ld1_nxv16i8_bitcast_to_i64:
+; CHECK-LE: // %bb.0:
+; CHECK-LE-NEXT: ptrue p0.b
+; CHECK-LE-NEXT: ld1b { z0.b }, p0/z, [x0, x1]
+; CHECK-LE-NEXT: ret
+;
+; CHECK-BE-LABEL: ld1_nxv16i8_bitcast_to_i64:
+; CHECK-BE: // %bb.0:
+; CHECK-BE-NEXT: ptrue p0.d
+; CHECK-BE-NEXT: add x8, x0, x1
+; CHECK-BE-NEXT: ld1d { z0.d }, p0/z, [x8]
+; CHECK-BE-NEXT: ret
%ptr = getelementptr inbounds i8, ptr %addr, i64 %off
%val = load volatile <vscale x 2 x i64>, ptr %ptr
ret <vscale x 2 x i64> %val
diff --git a/llvm/test/CodeGen/AArch64/sve-st1-addressing-mode-reg-reg.ll b/llvm/test/CodeGen/AArch64/sve-st1-addressing-mode-reg-reg.ll
index d859bbb567ebb..0bf6b12a5d020 100644
--- a/llvm/test/CodeGen/AArch64/sve-st1-addressing-mode-reg-reg.ll
+++ b/llvm/test/CodeGen/AArch64/sve-st1-addressing-mode-reg-reg.ll
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve,+bf16 < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve,+bf16 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-LE
+; RUN: llc -mtriple=aarch64_be-linux-gnu -mattr=+sve,+bf16 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-BE
; ST1B
@@ -15,33 +16,54 @@ define void @st1_nxv16i8(ptr %addr, i64 %off, <vscale x 16 x i8> %val) {
}
define void @st1_nxv16i8_bitcast_from_i16(ptr %addr, i64 %off, <vscale x 8 x i16> %val) {
-; CHECK-LABEL: st1_nxv16i8_bitcast_from_i16:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p0.b
-; CHECK-NEXT: st1b { z0.b }, p0, [x0, x1]
-; CHECK-NEXT: ret
+; CHECK-LE-LABEL: st1_nxv16i8_bitcast_from_i16:
+; CHECK-LE: // %bb.0:
+; CHECK-LE-NEXT: ptrue p0.b
+; CHECK-LE-NEXT: st1b { z0.b }, p0, [x0, x1]
+; CHECK-LE-NEXT: ret
+;
+; CHECK-BE-LABEL: st1_nxv16i8_bitcast_from_i16:
+; CHECK-BE: // %bb.0:
+; CHECK-BE-NEXT: ptrue p0.h
+; CHECK-BE-NEXT: add x8, x0, x1
+; CHECK-BE-NEXT: st1h { z0.h }, p0, [x8]
+; CHECK-BE-NEXT: ret
%ptr = getelementptr inbounds i8, ptr %addr, i64 %off
store <vscale x 8 x i16> %val, ptr %ptr
ret void
}
define void @st1_nxv16i8_bitcast_from_i32(ptr %addr, i64 %off, <vscale x 4 x i32> %val) {
-; CHECK-LABEL: st1_nxv16i8_bitcast_from_i32:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p0.b
-; CHECK-NEXT: st1b { z0.b }, p0, [x0, x1]
-; CHECK-NEXT: ret
+; CHECK-LE-LABEL: st1_nxv16i8_bitcast_from_i32:
+; CHECK-LE: // %bb.0:
+; CHECK-LE-NEXT: ptrue p0.b
+; CHECK-LE-NEXT: st1b { z0.b }, p0, [x0, x1]
+; CHECK-LE-NEXT: ret
+;
+; CHECK-BE-LABEL: st1_nxv16i8_bitcast_from_i32:
+; CHECK-BE: // %bb.0:
+; CHECK-BE-NEXT: ptrue p0.s
+; CHECK-BE-NEXT: add x8, x0, x1
+; CHECK-BE-NEXT: st1w { z0.s }, p0, [x8]
+; CHECK-BE-NEXT: ret
%ptr = getelementptr inbounds i8, ptr %addr, i64 %off
store <vscale x 4 x i32> %val, ptr %ptr
ret void
}
define void @st1_nxv16i8_bitcast_from_i64(ptr %addr, i64 %off, <vscale x 2 x i64> %val) {
-; CHECK-LABEL: st1_nxv16i8_bitcast_from_i64:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p0.b
-; CHECK-NEXT: st1b { z0.b }, p0, [x0, x1]
-; CHECK-NEXT: ret
+; CHECK-LE-LABEL: st1_nxv16i8_bitcast_from_i64:
+; CHECK-LE: // %bb.0:
+; CHECK-LE-NEXT: ptrue p0.b
+; CHECK-LE-NEXT: st1b { z0.b }, p0, [x0, x1]
+; CHECK-LE-NEXT: ret
+;
+; CHECK-BE-LABEL: st1_nxv16i8_bitcast_from_i64:
+; CHECK-BE: // %bb.0:
+; CHECK-BE-NEXT: ptrue p0.d
+; CHECK-BE-NEXT: add x8, x0, x1
+; CHECK-BE-NEXT: st1d { z0.d }, p0, [x8]
+; CHECK-BE-NEXT: ret
%ptr = getelementptr inbounds i8, ptr %addr, i64 %off
store <vscale x 2 x i64> %val, ptr %ptr
ret void
More information about the llvm-commits
mailing list