[llvm] [DAGISel][ARM] Fix vector truncate combine for big-endian (PR #118101)
Oliver Stannard via llvm-commits
llvm-commits at lists.llvm.org
Wed Dec 4 05:51:50 PST 2024
https://github.com/ostannard updated https://github.com/llvm/llvm-project/pull/118101
>From 7fb3d04e4c1cccb03f428e01d516466ef95a9e58 Mon Sep 17 00:00:00 2001
From: Oliver Stannard <oliver.stannard at arm.com>
Date: Fri, 29 Nov 2024 14:43:30 +0000
Subject: [PATCH 1/5] Add test showing bug
---
llvm/test/CodeGen/ARM/big-endian-vector-trunc.ll | 16 ++++++++++++++++
1 file changed, 16 insertions(+)
create mode 100644 llvm/test/CodeGen/ARM/big-endian-vector-trunc.ll
diff --git a/llvm/test/CodeGen/ARM/big-endian-vector-trunc.ll b/llvm/test/CodeGen/ARM/big-endian-vector-trunc.ll
new file mode 100644
index 00000000000000..5de8bf477e53e1
--- /dev/null
+++ b/llvm/test/CodeGen/ARM/big-endian-vector-trunc.ll
@@ -0,0 +1,16 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=armebv7-unknown-none-eabihf -mattr=+neon < %s | FileCheck %s
+
+define i32 @test(i64 %arg1) "target-features"="+neon" {
+; CHECK-LABEL: test:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: mov r0, #0
+; CHECK-NEXT: bx lr
+entry:
+ %insert_zero = insertelement <8 x i64> poison, i64 %arg1, i64 0
+ %splat_zero = shufflevector <8 x i64> %insert_zero, <8 x i64> poison, <8 x i32> zeroinitializer
+ %cmp_vec = icmp ule <8 x i64> <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>, %splat_zero
+ %first_cmp = extractelement <8 x i1> %cmp_vec, i32 0
+ %ext = zext i1 %first_cmp to i32
+ ret i32 %ext
+}
>From 700a90545d68d7f4e40ccee5de67b2f7c7c85d9e Mon Sep 17 00:00:00 2001
From: Oliver Stannard <oliver.stannard at arm.com>
Date: Fri, 29 Nov 2024 14:45:33 +0000
Subject: [PATCH 2/5] [DAGISel][ARM] Fix vector truncate combine for big-endian
This DAG combine was incorrect for big-endian targets, because it
assumes that when a bitcast changes the lane width, the
least-significant bits of the wider lanes are in the lower-numbered
lanes of the smaller type, which is only true for little-endian.
---
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 5 ++++-
.../test/CodeGen/ARM/big-endian-vector-trunc.ll | 17 ++++++++++++++++-
2 files changed, 20 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 521829675ae7c3..90aa3009fb5ef0 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -15495,12 +15495,15 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
unsigned BuildVecNumElts = BuildVect.getNumOperands();
unsigned TruncVecNumElts = VT.getVectorNumElements();
unsigned TruncEltOffset = BuildVecNumElts / TruncVecNumElts;
+ unsigned FirstElt =
+ DAG.getDataLayout().isBigEndian() ? (TruncEltOffset - 1) : 0;
assert((BuildVecNumElts % TruncVecNumElts) == 0 &&
"Invalid number of elements");
SmallVector<SDValue, 8> Opnds;
- for (unsigned i = 0, e = BuildVecNumElts; i != e; i += TruncEltOffset)
+ for (unsigned i = FirstElt, e = BuildVecNumElts; i < e;
+ i += TruncEltOffset)
Opnds.push_back(BuildVect.getOperand(i));
return DAG.getBuildVector(VT, DL, Opnds);
diff --git a/llvm/test/CodeGen/ARM/big-endian-vector-trunc.ll b/llvm/test/CodeGen/ARM/big-endian-vector-trunc.ll
index 5de8bf477e53e1..cdc09754d2654c 100644
--- a/llvm/test/CodeGen/ARM/big-endian-vector-trunc.ll
+++ b/llvm/test/CodeGen/ARM/big-endian-vector-trunc.ll
@@ -4,8 +4,23 @@
define i32 @test(i64 %arg1) "target-features"="+neon" {
; CHECK-LABEL: test:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: mov r0, #0
+; CHECK-NEXT: subs r1, r1, #1
+; CHECK-NEXT: mov r2, #0
+; CHECK-NEXT: sbcs r0, r0, #0
+; CHECK-NEXT: vldr s0, .LCPI0_0
+; CHECK-NEXT: movwhs r2, #1
+; CHECK-NEXT: cmp r2, #0
+; CHECK-NEXT: mvnne r2, #0
+; CHECK-NEXT: vmov s1, r2
+; CHECK-NEXT: vmovn.i32 d16, q0
+; CHECK-NEXT: vmovn.i16 d16, q8
+; CHECK-NEXT: vmov.u8 r0, d16[0]
+; CHECK-NEXT: and r0, r0, #1
; CHECK-NEXT: bx lr
+; CHECK-NEXT: .p2align 2
+; CHECK-NEXT: @ %bb.1:
+; CHECK-NEXT: .LCPI0_0:
+; CHECK-NEXT: .long 0xffffffff @ float NaN
entry:
%insert_zero = insertelement <8 x i64> poison, i64 %arg1, i64 0
%splat_zero = shufflevector <8 x i64> %insert_zero, <8 x i64> poison, <8 x i32> zeroinitializer
>From 804db3d27fbdbb45241c6e8179ecc97ab62d6cf1 Mon Sep 17 00:00:00 2001
From: Oliver Stannard <oliver.stannard at arm.com>
Date: Fri, 29 Nov 2024 15:42:39 +0000
Subject: [PATCH 3/5] Remove unneeded target-features attribute
---
llvm/test/CodeGen/ARM/big-endian-vector-trunc.ll | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/test/CodeGen/ARM/big-endian-vector-trunc.ll b/llvm/test/CodeGen/ARM/big-endian-vector-trunc.ll
index cdc09754d2654c..15a4f2e37ca469 100644
--- a/llvm/test/CodeGen/ARM/big-endian-vector-trunc.ll
+++ b/llvm/test/CodeGen/ARM/big-endian-vector-trunc.ll
@@ -1,7 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -mtriple=armebv7-unknown-none-eabihf -mattr=+neon < %s | FileCheck %s
-define i32 @test(i64 %arg1) "target-features"="+neon" {
+define i32 @test(i64 %arg1) {
; CHECK-LABEL: test:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: subs r1, r1, #1
>From fc2f59abee13242be5a1970298d101394a8a54fd Mon Sep 17 00:00:00 2001
From: Oliver Stannard <oliver.stannard at arm.com>
Date: Tue, 3 Dec 2024 09:26:35 +0000
Subject: [PATCH 4/5] Also test little-endian
---
.../CodeGen/ARM/big-endian-vector-trunc.ll | 31 -----------
llvm/test/CodeGen/ARM/vector-trunc.ll | 52 +++++++++++++++++++
2 files changed, 52 insertions(+), 31 deletions(-)
delete mode 100644 llvm/test/CodeGen/ARM/big-endian-vector-trunc.ll
create mode 100644 llvm/test/CodeGen/ARM/vector-trunc.ll
diff --git a/llvm/test/CodeGen/ARM/big-endian-vector-trunc.ll b/llvm/test/CodeGen/ARM/big-endian-vector-trunc.ll
deleted file mode 100644
index 15a4f2e37ca469..00000000000000
--- a/llvm/test/CodeGen/ARM/big-endian-vector-trunc.ll
+++ /dev/null
@@ -1,31 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc -mtriple=armebv7-unknown-none-eabihf -mattr=+neon < %s | FileCheck %s
-
-define i32 @test(i64 %arg1) {
-; CHECK-LABEL: test:
-; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: subs r1, r1, #1
-; CHECK-NEXT: mov r2, #0
-; CHECK-NEXT: sbcs r0, r0, #0
-; CHECK-NEXT: vldr s0, .LCPI0_0
-; CHECK-NEXT: movwhs r2, #1
-; CHECK-NEXT: cmp r2, #0
-; CHECK-NEXT: mvnne r2, #0
-; CHECK-NEXT: vmov s1, r2
-; CHECK-NEXT: vmovn.i32 d16, q0
-; CHECK-NEXT: vmovn.i16 d16, q8
-; CHECK-NEXT: vmov.u8 r0, d16[0]
-; CHECK-NEXT: and r0, r0, #1
-; CHECK-NEXT: bx lr
-; CHECK-NEXT: .p2align 2
-; CHECK-NEXT: @ %bb.1:
-; CHECK-NEXT: .LCPI0_0:
-; CHECK-NEXT: .long 0xffffffff @ float NaN
-entry:
- %insert_zero = insertelement <8 x i64> poison, i64 %arg1, i64 0
- %splat_zero = shufflevector <8 x i64> %insert_zero, <8 x i64> poison, <8 x i32> zeroinitializer
- %cmp_vec = icmp ule <8 x i64> <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>, %splat_zero
- %first_cmp = extractelement <8 x i1> %cmp_vec, i32 0
- %ext = zext i1 %first_cmp to i32
- ret i32 %ext
-}
diff --git a/llvm/test/CodeGen/ARM/vector-trunc.ll b/llvm/test/CodeGen/ARM/vector-trunc.ll
new file mode 100644
index 00000000000000..9acf463c2be939
--- /dev/null
+++ b/llvm/test/CodeGen/ARM/vector-trunc.ll
@@ -0,0 +1,52 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=armv7-unknown-none-eabihf -mattr=+neon < %s | FileCheck %s --check-prefix=LE
+; RUN: llc -mtriple=armebv7-unknown-none-eabihf -mattr=+neon < %s | FileCheck %s --check-prefix=BE
+
+define i32 @test(i64 %arg1) {
+; LE-LABEL: test:
+; LE: @ %bb.0: @ %entry
+; LE-NEXT: subs r0, r0, #1
+; LE-NEXT: mov r2, #0
+; LE-NEXT: sbcs r0, r1, #0
+; LE-NEXT: vldr s0, .LCPI0_0
+; LE-NEXT: movwhs r2, #1
+; LE-NEXT: cmp r2, #0
+; LE-NEXT: mvnne r2, #0
+; LE-NEXT: vmov s1, r2
+; LE-NEXT: vmovn.i32 d16, q0
+; LE-NEXT: vmovn.i16 d16, q8
+; LE-NEXT: vmov.u8 r0, d16[0]
+; LE-NEXT: and r0, r0, #1
+; LE-NEXT: bx lr
+; LE-NEXT: .p2align 2
+; LE-NEXT: @ %bb.1:
+; LE-NEXT: .LCPI0_0:
+; LE-NEXT: .long 0xffffffff @ float NaN
+;
+; BE-LABEL: test:
+; BE: @ %bb.0: @ %entry
+; BE-NEXT: subs r1, r1, #1
+; BE-NEXT: mov r2, #0
+; BE-NEXT: sbcs r0, r0, #0
+; BE-NEXT: vldr s0, .LCPI0_0
+; BE-NEXT: movwhs r2, #1
+; BE-NEXT: cmp r2, #0
+; BE-NEXT: mvnne r2, #0
+; BE-NEXT: vmov s1, r2
+; BE-NEXT: vmovn.i32 d16, q0
+; BE-NEXT: vmovn.i16 d16, q8
+; BE-NEXT: vmov.u8 r0, d16[0]
+; BE-NEXT: and r0, r0, #1
+; BE-NEXT: bx lr
+; BE-NEXT: .p2align 2
+; BE-NEXT: @ %bb.1:
+; BE-NEXT: .LCPI0_0:
+; BE-NEXT: .long 0xffffffff @ float NaN
+entry:
+ %insert_zero = insertelement <8 x i64> poison, i64 %arg1, i64 0
+ %splat_zero = shufflevector <8 x i64> %insert_zero, <8 x i64> poison, <8 x i32> zeroinitializer
+ %cmp_vec = icmp ule <8 x i64> <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>, %splat_zero
+ %first_cmp = extractelement <8 x i1> %cmp_vec, i32 0
+ %ext = zext i1 %first_cmp to i32
+ ret i32 %ext
+}
>From 3e7564bf197a62de51d73de4a0814e2e3be418d9 Mon Sep 17 00:00:00 2001
From: Oliver Stannard <oliver.stannard at arm.com>
Date: Wed, 4 Dec 2024 13:50:48 +0000
Subject: [PATCH 5/5] Use existing isLE variable
---
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 37be089096ebb5..4435d5ff50c728 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -15495,8 +15495,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
unsigned BuildVecNumElts = BuildVect.getNumOperands();
unsigned TruncVecNumElts = VT.getVectorNumElements();
unsigned TruncEltOffset = BuildVecNumElts / TruncVecNumElts;
- unsigned FirstElt =
- DAG.getDataLayout().isBigEndian() ? (TruncEltOffset - 1) : 0;
+ unsigned FirstElt = isLE ? 0 : (TruncEltOffset - 1);
assert((BuildVecNumElts % TruncVecNumElts) == 0 &&
"Invalid number of elements");
More information about the llvm-commits
mailing list