[llvm] [DAG] Fold vecreduce.or(sext(x)) to sext(vecreduce.or(x)) (PR #108959)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Sep 17 04:04:51 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-aarch64
Author: David Green (davemgreen)
<details>
<summary>Changes</summary>
The same is true for and / xor reductions, where the sext / zext can be sank down through the bitwise operation.
https://alive2.llvm.org/ce/z/TvzCd5
---
Patch is 54.14 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/108959.diff
2 Files Affected:
- (modified) llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (+13)
- (modified) llvm/test/CodeGen/AArch64/vecreduce-bitext.ll (+312-547)
``````````diff
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index cd39cb08f8de92..37fcd09d4f5626 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -27010,6 +27010,19 @@ SDValue DAGCombiner::visitVECREDUCE(SDNode *N) {
return DAG.getNode(Opcode, SDLoc(N), N->getValueType(0), Subvec);
}
+ // vecreduce_or(sext(x)) -> sext(vecreduce_or(x))
+ // Same for zext and anyext, and for and/or/xor reductions.
+ if ((Opcode == ISD::VECREDUCE_OR || Opcode == ISD::VECREDUCE_AND ||
+ Opcode == ISD::VECREDUCE_XOR) &&
+ (N0.getOpcode() == ISD::SIGN_EXTEND ||
+ N0.getOpcode() == ISD::ZERO_EXTEND ||
+ N0.getOpcode() == ISD::ANY_EXTEND) &&
+ TLI.isOperationLegalOrCustom(Opcode, N0.getOperand(0).getValueType())) {
+ SDValue Red = DAG.getNode(Opcode, SDLoc(N),
+ N0.getOperand(0).getValueType().getScalarType(),
+ N0.getOperand(0));
+ return DAG.getNode(N0.getOpcode(), SDLoc(N), N->getValueType(0), Red);
+ }
return SDValue();
}
diff --git a/llvm/test/CodeGen/AArch64/vecreduce-bitext.ll b/llvm/test/CodeGen/AArch64/vecreduce-bitext.ll
index 1b1b7e676bb3cc..cc65f17b718649 100644
--- a/llvm/test/CodeGen/AArch64/vecreduce-bitext.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-bitext.ll
@@ -4,13 +4,13 @@
define zeroext i16 @and_sext_v8i8_i16(<8 x i8> %x) {
; CHECK-LABEL: and_sext_v8i8_i16:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: sshll v0.8h, v0.8b, #0
-; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
; CHECK-NEXT: fmov x8, d0
; CHECK-NEXT: and x8, x8, x8, lsr #32
; CHECK-NEXT: lsr x9, x8, #16
-; CHECK-NEXT: and w0, w8, w9
+; CHECK-NEXT: and w8, w8, w9
+; CHECK-NEXT: and w8, w8, w8, lsr #8
+; CHECK-NEXT: sxtb w8, w8
+; CHECK-NEXT: and w0, w8, #0xffff
; CHECK-NEXT: ret
entry:
%y = sext <8 x i8> %x to <8 x i16>
@@ -21,13 +21,12 @@ entry:
define zeroext i16 @and_zext_v8i8_i16(<8 x i8> %x) {
; CHECK-LABEL: and_zext_v8i8_i16:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: ushll v0.8h, v0.8b, #0
-; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
; CHECK-NEXT: fmov x8, d0
; CHECK-NEXT: and x8, x8, x8, lsr #32
; CHECK-NEXT: lsr x9, x8, #16
-; CHECK-NEXT: and w0, w8, w9
+; CHECK-NEXT: and w8, w8, w9
+; CHECK-NEXT: and w8, w8, w8, lsr #8
+; CHECK-NEXT: and w0, w8, #0xff
; CHECK-NEXT: ret
entry:
%y = zext <8 x i8> %x to <8 x i16>
@@ -40,13 +39,13 @@ define zeroext i16 @and_sext_v16i8_i16(<16 x i8> %x) {
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
-; CHECK-NEXT: sshll v0.8h, v0.8b, #0
-; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
; CHECK-NEXT: fmov x8, d0
; CHECK-NEXT: and x8, x8, x8, lsr #32
; CHECK-NEXT: lsr x9, x8, #16
-; CHECK-NEXT: and w0, w8, w9
+; CHECK-NEXT: and w8, w8, w9
+; CHECK-NEXT: and w8, w8, w8, lsr #8
+; CHECK-NEXT: sxtb w8, w8
+; CHECK-NEXT: and w0, w8, #0xffff
; CHECK-NEXT: ret
entry:
%y = sext <16 x i8> %x to <16 x i16>
@@ -59,13 +58,12 @@ define zeroext i16 @and_zext_v16i8_i16(<16 x i8> %x) {
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
-; CHECK-NEXT: ushll v0.8h, v0.8b, #0
-; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
; CHECK-NEXT: fmov x8, d0
; CHECK-NEXT: and x8, x8, x8, lsr #32
; CHECK-NEXT: lsr x9, x8, #16
-; CHECK-NEXT: and w0, w8, w9
+; CHECK-NEXT: and w8, w8, w9
+; CHECK-NEXT: and w8, w8, w8, lsr #8
+; CHECK-NEXT: and w0, w8, #0xff
; CHECK-NEXT: ret
entry:
%y = zext <16 x i8> %x to <16 x i16>
@@ -76,15 +74,12 @@ entry:
define i32 @and_sext_v8i8_i32(<8 x i8> %x) {
; CHECK-LABEL: and_sext_v8i8_i32:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: sshll v0.8h, v0.8b, #0
-; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
-; CHECK-NEXT: sshll v0.4s, v0.4h, #0
-; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
; CHECK-NEXT: fmov x8, d0
-; CHECK-NEXT: lsr x9, x8, #32
-; CHECK-NEXT: and w0, w8, w9
+; CHECK-NEXT: and x8, x8, x8, lsr #32
+; CHECK-NEXT: lsr x9, x8, #16
+; CHECK-NEXT: and w8, w8, w9
+; CHECK-NEXT: and w8, w8, w8, lsr #8
+; CHECK-NEXT: sxtb w0, w8
; CHECK-NEXT: ret
entry:
%y = sext <8 x i8> %x to <8 x i32>
@@ -95,15 +90,12 @@ entry:
define i32 @and_zext_v8i8_i32(<8 x i8> %x) {
; CHECK-LABEL: and_zext_v8i8_i32:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: ushll v0.8h, v0.8b, #0
-; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
-; CHECK-NEXT: ushll v0.4s, v0.4h, #0
-; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
; CHECK-NEXT: fmov x8, d0
-; CHECK-NEXT: lsr x9, x8, #32
-; CHECK-NEXT: and w0, w8, w9
+; CHECK-NEXT: and x8, x8, x8, lsr #32
+; CHECK-NEXT: lsr x9, x8, #16
+; CHECK-NEXT: and w8, w8, w9
+; CHECK-NEXT: and w8, w8, w8, lsr #8
+; CHECK-NEXT: and w0, w8, #0xff
; CHECK-NEXT: ret
entry:
%y = zext <8 x i8> %x to <8 x i32>
@@ -114,19 +106,14 @@ entry:
define i32 @and_sext_v16i8_i32(<16 x i8> %x) {
; CHECK-LABEL: and_sext_v16i8_i32:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: sshll2 v1.8h, v0.16b, #0
-; CHECK-NEXT: sshll v0.8h, v0.8b, #0
-; CHECK-NEXT: ext v2.16b, v1.16b, v1.16b, #8
-; CHECK-NEXT: ext v3.16b, v0.16b, v0.16b, #8
-; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
-; CHECK-NEXT: and v1.8b, v3.8b, v2.8b
-; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
-; CHECK-NEXT: sshll v0.4s, v0.4h, #0
; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
; CHECK-NEXT: fmov x8, d0
-; CHECK-NEXT: lsr x9, x8, #32
-; CHECK-NEXT: and w0, w8, w9
+; CHECK-NEXT: and x8, x8, x8, lsr #32
+; CHECK-NEXT: lsr x9, x8, #16
+; CHECK-NEXT: and w8, w8, w9
+; CHECK-NEXT: and w8, w8, w8, lsr #8
+; CHECK-NEXT: sxtb w0, w8
; CHECK-NEXT: ret
entry:
%y = sext <16 x i8> %x to <16 x i32>
@@ -137,19 +124,14 @@ entry:
define i32 @and_zext_v16i8_i32(<16 x i8> %x) {
; CHECK-LABEL: and_zext_v16i8_i32:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: ushll2 v1.8h, v0.16b, #0
-; CHECK-NEXT: ushll v0.8h, v0.8b, #0
-; CHECK-NEXT: ushll2 v2.4s, v1.8h, #0
-; CHECK-NEXT: ushll2 v3.4s, v0.8h, #0
-; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
-; CHECK-NEXT: and v1.16b, v3.16b, v2.16b
-; CHECK-NEXT: ushll v0.4s, v0.4h, #0
-; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
; CHECK-NEXT: fmov x8, d0
-; CHECK-NEXT: lsr x9, x8, #32
-; CHECK-NEXT: and w0, w8, w9
+; CHECK-NEXT: and x8, x8, x8, lsr #32
+; CHECK-NEXT: lsr x9, x8, #16
+; CHECK-NEXT: and w8, w8, w9
+; CHECK-NEXT: and w8, w8, w8, lsr #8
+; CHECK-NEXT: and w0, w8, #0xff
; CHECK-NEXT: ret
entry:
%y = zext <16 x i8> %x to <16 x i32>
@@ -160,18 +142,12 @@ entry:
define i64 @and_sext_v8i8_i64(<8 x i8> %x) {
; CHECK-LABEL: and_sext_v8i8_i64:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: sshll v0.8h, v0.8b, #0
-; CHECK-NEXT: sshll2 v1.4s, v0.8h, #0
-; CHECK-NEXT: sshll v0.4s, v0.4h, #0
-; CHECK-NEXT: ext v2.16b, v1.16b, v1.16b, #8
-; CHECK-NEXT: ext v3.16b, v0.16b, v0.16b, #8
-; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
-; CHECK-NEXT: and v1.8b, v3.8b, v2.8b
-; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
-; CHECK-NEXT: sshll v0.2d, v0.2s, #0
-; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
-; CHECK-NEXT: fmov x0, d0
+; CHECK-NEXT: fmov x8, d0
+; CHECK-NEXT: and x8, x8, x8, lsr #32
+; CHECK-NEXT: lsr x9, x8, #16
+; CHECK-NEXT: and w8, w8, w9
+; CHECK-NEXT: and w8, w8, w8, lsr #8
+; CHECK-NEXT: sxtb x0, w8
; CHECK-NEXT: ret
entry:
%y = sext <8 x i8> %x to <8 x i64>
@@ -182,18 +158,12 @@ entry:
define i64 @and_zext_v8i8_i64(<8 x i8> %x) {
; CHECK-LABEL: and_zext_v8i8_i64:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: ushll v0.8h, v0.8b, #0
-; CHECK-NEXT: ushll2 v1.4s, v0.8h, #0
-; CHECK-NEXT: ushll v0.4s, v0.4h, #0
-; CHECK-NEXT: ushll2 v2.2d, v1.4s, #0
-; CHECK-NEXT: ushll2 v3.2d, v0.4s, #0
-; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
-; CHECK-NEXT: and v1.16b, v3.16b, v2.16b
-; CHECK-NEXT: ushll v0.2d, v0.2s, #0
-; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
-; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
-; CHECK-NEXT: fmov x0, d0
+; CHECK-NEXT: fmov x8, d0
+; CHECK-NEXT: and x8, x8, x8, lsr #32
+; CHECK-NEXT: lsr x9, x8, #16
+; CHECK-NEXT: and w8, w8, w9
+; CHECK-NEXT: and w8, w8, w8, lsr #8
+; CHECK-NEXT: and x0, x8, #0xff
; CHECK-NEXT: ret
entry:
%y = zext <8 x i8> %x to <8 x i64>
@@ -204,27 +174,14 @@ entry:
define i64 @and_sext_v16i8_i64(<16 x i8> %x) {
; CHECK-LABEL: and_sext_v16i8_i64:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: sshll2 v1.8h, v0.16b, #0
-; CHECK-NEXT: sshll v0.8h, v0.8b, #0
-; CHECK-NEXT: sshll v2.4s, v1.4h, #0
-; CHECK-NEXT: sshll v3.4s, v0.4h, #0
-; CHECK-NEXT: sshll2 v1.4s, v1.8h, #0
-; CHECK-NEXT: sshll2 v0.4s, v0.8h, #0
-; CHECK-NEXT: ext v4.16b, v2.16b, v2.16b, #8
-; CHECK-NEXT: ext v5.16b, v3.16b, v3.16b, #8
-; CHECK-NEXT: ext v6.16b, v1.16b, v1.16b, #8
-; CHECK-NEXT: ext v7.16b, v0.16b, v0.16b, #8
-; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
-; CHECK-NEXT: and v1.8b, v3.8b, v2.8b
-; CHECK-NEXT: and v3.8b, v5.8b, v4.8b
-; CHECK-NEXT: and v0.8b, v1.8b, v0.8b
-; CHECK-NEXT: and v2.8b, v7.8b, v6.8b
-; CHECK-NEXT: and v1.8b, v3.8b, v2.8b
-; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
-; CHECK-NEXT: sshll v0.2d, v0.2s, #0
; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
-; CHECK-NEXT: fmov x0, d0
+; CHECK-NEXT: fmov x8, d0
+; CHECK-NEXT: and x8, x8, x8, lsr #32
+; CHECK-NEXT: lsr x9, x8, #16
+; CHECK-NEXT: and w8, w8, w9
+; CHECK-NEXT: and w8, w8, w8, lsr #8
+; CHECK-NEXT: sxtb x0, w8
; CHECK-NEXT: ret
entry:
%y = sext <16 x i8> %x to <16 x i64>
@@ -235,28 +192,14 @@ entry:
define i64 @and_zext_v16i8_i64(<16 x i8> %x) {
; CHECK-LABEL: and_zext_v16i8_i64:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: ushll2 v1.8h, v0.16b, #0
-; CHECK-NEXT: ushll v0.8h, v0.8b, #0
-; CHECK-NEXT: ushll v2.4s, v1.4h, #0
-; CHECK-NEXT: ushll v3.4s, v0.4h, #0
-; CHECK-NEXT: ushll2 v1.4s, v1.8h, #0
-; CHECK-NEXT: ushll2 v0.4s, v0.8h, #0
-; CHECK-NEXT: and v5.8b, v3.8b, v2.8b
-; CHECK-NEXT: ext v2.16b, v2.16b, v2.16b, #8
-; CHECK-NEXT: ext v3.16b, v3.16b, v3.16b, #8
-; CHECK-NEXT: and v4.8b, v0.8b, v1.8b
-; CHECK-NEXT: ushll2 v1.2d, v1.4s, #0
-; CHECK-NEXT: ushll2 v0.2d, v0.4s, #0
-; CHECK-NEXT: and v4.8b, v5.8b, v4.8b
-; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
-; CHECK-NEXT: and v1.8b, v3.8b, v2.8b
-; CHECK-NEXT: ushll v2.2d, v4.2s, #0
-; CHECK-NEXT: ushll v1.2d, v1.2s, #0
-; CHECK-NEXT: and v0.16b, v2.16b, v0.16b
-; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
-; CHECK-NEXT: fmov x0, d0
+; CHECK-NEXT: fmov x8, d0
+; CHECK-NEXT: and x8, x8, x8, lsr #32
+; CHECK-NEXT: lsr x9, x8, #16
+; CHECK-NEXT: and w8, w8, w9
+; CHECK-NEXT: and w8, w8, w8, lsr #8
+; CHECK-NEXT: and x0, x8, #0xff
; CHECK-NEXT: ret
entry:
%y = zext <16 x i8> %x to <16 x i64>
@@ -267,12 +210,11 @@ entry:
define i32 @and_sext_v4i16_i32(<4 x i16> %x) {
; CHECK-LABEL: and_sext_v4i16_i32:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: sshll v0.4s, v0.4h, #0
-; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
; CHECK-NEXT: fmov x8, d0
; CHECK-NEXT: lsr x9, x8, #32
-; CHECK-NEXT: and w0, w8, w9
+; CHECK-NEXT: and w8, w8, w9
+; CHECK-NEXT: and w8, w8, w8, lsr #16
+; CHECK-NEXT: sxth w0, w8
; CHECK-NEXT: ret
entry:
%y = sext <4 x i16> %x to <4 x i32>
@@ -283,11 +225,9 @@ entry:
define i32 @and_zext_v4i16_i32(<4 x i16> %x) {
; CHECK-LABEL: and_zext_v4i16_i32:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: ushll v0.4s, v0.4h, #0
-; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
; CHECK-NEXT: fmov x8, d0
-; CHECK-NEXT: lsr x9, x8, #32
+; CHECK-NEXT: and x8, x8, x8, lsr #32
+; CHECK-NEXT: lsr x9, x8, #16
; CHECK-NEXT: and w0, w8, w9
; CHECK-NEXT: ret
entry:
@@ -301,12 +241,11 @@ define i32 @and_sext_v8i16_i32(<8 x i16> %x) {
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
-; CHECK-NEXT: sshll v0.4s, v0.4h, #0
-; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
; CHECK-NEXT: fmov x8, d0
; CHECK-NEXT: lsr x9, x8, #32
-; CHECK-NEXT: and w0, w8, w9
+; CHECK-NEXT: and w8, w8, w9
+; CHECK-NEXT: and w8, w8, w8, lsr #16
+; CHECK-NEXT: sxth w0, w8
; CHECK-NEXT: ret
entry:
%y = sext <8 x i16> %x to <8 x i32>
@@ -319,11 +258,9 @@ define i32 @and_zext_v8i16_i32(<8 x i16> %x) {
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
-; CHECK-NEXT: ushll v0.4s, v0.4h, #0
-; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
; CHECK-NEXT: fmov x8, d0
-; CHECK-NEXT: lsr x9, x8, #32
+; CHECK-NEXT: and x8, x8, x8, lsr #32
+; CHECK-NEXT: lsr x9, x8, #16
; CHECK-NEXT: and w0, w8, w9
; CHECK-NEXT: ret
entry:
@@ -335,13 +272,11 @@ entry:
define i64 @and_sext_v4i16_i64(<4 x i16> %x) {
; CHECK-LABEL: and_sext_v4i16_i64:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: sshll v0.4s, v0.4h, #0
-; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
-; CHECK-NEXT: sshll v0.2d, v0.2s, #0
-; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
-; CHECK-NEXT: fmov x0, d0
+; CHECK-NEXT: fmov x8, d0
+; CHECK-NEXT: lsr x9, x8, #32
+; CHECK-NEXT: and w8, w8, w9
+; CHECK-NEXT: and w8, w8, w8, lsr #16
+; CHECK-NEXT: sxth x0, w8
; CHECK-NEXT: ret
entry:
%y = sext <4 x i16> %x to <4 x i64>
@@ -352,13 +287,10 @@ entry:
define i64 @and_zext_v4i16_i64(<4 x i16> %x) {
; CHECK-LABEL: and_zext_v4i16_i64:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: ushll v0.4s, v0.4h, #0
-; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
-; CHECK-NEXT: ushll v0.2d, v0.2s, #0
-; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
-; CHECK-NEXT: fmov x0, d0
+; CHECK-NEXT: fmov x8, d0
+; CHECK-NEXT: and x8, x8, x8, lsr #32
+; CHECK-NEXT: lsr x9, x8, #16
+; CHECK-NEXT: and w0, w8, w9
; CHECK-NEXT: ret
entry:
%y = zext <4 x i16> %x to <4 x i64>
@@ -369,17 +301,13 @@ entry:
define i64 @and_sext_v8i16_i64(<8 x i16> %x) {
; CHECK-LABEL: and_sext_v8i16_i64:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: sshll2 v1.4s, v0.8h, #0
-; CHECK-NEXT: sshll v0.4s, v0.4h, #0
-; CHECK-NEXT: ext v2.16b, v1.16b, v1.16b, #8
-; CHECK-NEXT: ext v3.16b, v0.16b, v0.16b, #8
-; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
-; CHECK-NEXT: and v1.8b, v3.8b, v2.8b
-; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
-; CHECK-NEXT: sshll v0.2d, v0.2s, #0
; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
-; CHECK-NEXT: fmov x0, d0
+; CHECK-NEXT: fmov x8, d0
+; CHECK-NEXT: lsr x9, x8, #32
+; CHECK-NEXT: and w8, w8, w9
+; CHECK-NEXT: and w8, w8, w8, lsr #16
+; CHECK-NEXT: sxth x0, w8
; CHECK-NEXT: ret
entry:
%y = sext <8 x i16> %x to <8 x i64>
@@ -390,17 +318,12 @@ entry:
define i64 @and_zext_v8i16_i64(<8 x i16> %x) {
; CHECK-LABEL: and_zext_v8i16_i64:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: ushll2 v1.4s, v0.8h, #0
-; CHECK-NEXT: ushll v0.4s, v0.4h, #0
-; CHECK-NEXT: ushll2 v2.2d, v1.4s, #0
-; CHECK-NEXT: ushll2 v3.2d, v0.4s, #0
-; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
-; CHECK-NEXT: and v1.16b, v3.16b, v2.16b
-; CHECK-NEXT: ushll v0.2d, v0.2s, #0
-; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
-; CHECK-NEXT: fmov x0, d0
+; CHECK-NEXT: fmov x8, d0
+; CHECK-NEXT: and x8, x8, x8, lsr #32
+; CHECK-NEXT: lsr x9, x8, #16
+; CHECK-NEXT: and w0, w8, w9
; CHECK-NEXT: ret
entry:
%y = zext <8 x i16> %x to <8 x i64>
@@ -411,10 +334,10 @@ entry:
define i64 @and_sext_v2i32_i64(<2 x i32> %x) {
; CHECK-LABEL: and_sext_v2i32_i64:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: sshll v0.2d, v0.2s, #0
-; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
-; CHECK-NEXT: fmov x0, d0
+; CHECK-NEXT: fmov x8, d0
+; CHECK-NEXT: lsr x9, x8, #32
+; CHECK-NEXT: and w8, w8, w9
+; CHECK-NEXT: sxtw x0, w8
; CHECK-NEXT: ret
entry:
%y = sext <2 x i32> %x to <2 x i64>
@@ -425,10 +348,9 @@ entry:
define i64 @and_zext_v2i32_i64(<2 x i32> %x) {
; CHECK-LABEL: and_zext_v2i32_i64:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: ushll v0.2d, v0.2s, #0
-; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
-; CHECK-NEXT: fmov x0, d0
+; CHECK-NEXT: fmov x8, d0
+; CHECK-NEXT: lsr x9, x8, #32
+; CHECK-NEXT: and w0, w8, w9
; CHECK-NEXT: ret
entry:
%y = zext <2 x i32> %x to <2 x i64>
@@ -441,10 +363,10 @@ define i64 @and_sext_v4i32_i64(<4 x i32> %x) {
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
-; CHECK-NEXT: sshll v0.2d, v0.2s, #0
-; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
-; CHECK-NEXT: fmov x0, d0
+; CHECK-NEXT: fmov x8, d0
+; CHECK-NEXT: lsr x9, x8, #32
+; CHECK-NEXT: and w8, w8, w9
+; CHECK-NEXT: sxtw x0, w8
; CHECK-NEXT: ret
entry:
%y = sext <4 x i32> %x to <4 x i64>
@@ -457,10 +379,9 @@ define i64 @and_zext_v4i32_i64(<4 x i32> %x) {
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
-; CHECK-NEXT: ushll v0.2d, v0.2s, #0
-; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
-; CHECK-NEXT: fmov x0, d0
+; CHECK-NEXT: fmov x8, d0
+; CHECK-NEXT: lsr x9, x8, #32
+; CHECK-NEXT: and w0, w8, w9
; CHECK-NEXT: ret
entry:
%y = zext <4 x i32> %x to <4 x i64>
@@ -471,13 +392,12 @@ entry:
define zeroext i16 @or_sext_v8i8_i16(<8 x i8> %x) {
; CHECK-LABEL: or_sext_v8i8_i16:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: sshll v0.8h, v0.8b, #0
-; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b
; CHECK-NEXT: fmov x8, d0
-; CHECK-NEXT: lsr x9, x8, #32
+; CHECK-NEXT: orr x8, x8, x8, lsr #32
+; CHECK-NEXT: lsr x9, x8, #16
; CHECK-NEXT: orr w8, w8, w9
-; CHECK-NEXT: orr w8, w8, w8, lsr #16
+; CHECK-NEXT: orr w8, w8, w8, lsr #8
+; CHECK-NEXT: sxtb w8, w8
; CHECK-NEXT: and w0, w8, #0xffff
; CHECK-NEXT: ret
entry:
@@ -489,14 +409,12 @@ entry:
define zeroext i16 @or_zext_v8i8_i16(<8 x i8> %x) {
; CHECK-LABEL: or_zext_v8i8_i16:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: ushll v0.8h, v0.8b, #0
-; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b
; CHECK-NEXT: fmov x8, d0
-; CHECK-NEXT: lsr x9, x8, #32
+; CHECK-NEXT: orr x8, x8, x8, lsr #32
+; CHECK-NEXT: lsr x9, x8, #16
; CHECK-NEXT: orr w8, w8, w9
-; CHECK-NEXT: orr w8, w8, w8, lsr #16
-; CHECK-NEXT: and w0, w8, #0xffff
+; CHECK-NEXT: orr w8, w8, w8, lsr #8
+; CHECK-NEXT: and w0, w8, #0xff
; CHECK-NEXT: ret
entry:
%y = zext <8 x i8> %x to <8 x i16>
@@ -509,13 +427,12 ...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/108959
More information about the llvm-commits
mailing list