[llvm] e60eb70 - recommit "[DAGCombiner] Teach scalarizeBinOpOfSplats handle scalable splat."
via llvm-commits
llvm-commits at lists.llvm.org
Thu Jul 21 02:39:00 PDT 2022
Author: jacquesguan
Date: 2022-07-21T17:34:34+08:00
New Revision: e60eb7053dc91a23cc3b96652aeda7513dd4b082
URL: https://github.com/llvm/llvm-project/commit/e60eb7053dc91a23cc3b96652aeda7513dd4b082
DIFF: https://github.com/llvm/llvm-project/commit/e60eb7053dc91a23cc3b96652aeda7513dd4b082.diff
LOG: recommit "[DAGCombiner] Teach scalarizeBinOpOfSplats handle scalable splat."
With fix for AArch64 and Hexgon test cases.
Added:
Modified:
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
llvm/test/CodeGen/AArch64/active_lane_mask.ll
llvm/test/CodeGen/AArch64/fdiv-combine.ll
llvm/test/CodeGen/AArch64/sve-fixed-length-fp-select.ll
llvm/test/CodeGen/AArch64/sve-fixed-length-int-select.ll
llvm/test/CodeGen/AArch64/sve-gep.ll
llvm/test/CodeGen/Hexagon/autohvx/isel-concat-multiple.ll
llvm/test/CodeGen/RISCV/rvv/vadd-sdnode.ll
llvm/test/CodeGen/RISCV/rvv/vand-sdnode.ll
llvm/test/CodeGen/RISCV/rvv/vmul-sdnode.ll
llvm/test/CodeGen/RISCV/rvv/vor-sdnode.ll
llvm/test/CodeGen/RISCV/rvv/vsub-sdnode.ll
llvm/test/CodeGen/RISCV/rvv/vxor-sdnode.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 4883b4b444778..06c633e45ccd6 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -23471,10 +23471,14 @@ static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG,
int Index0, Index1;
SDValue Src0 = DAG.getSplatSourceVector(N0, Index0);
SDValue Src1 = DAG.getSplatSourceVector(N1, Index1);
+ // Extract element from splat_vector should be free.
+ // TODO: use DAG.isSplatValue instead?
+ bool IsBothSplatVector = N0.getOpcode() == ISD::SPLAT_VECTOR &&
+ N1.getOpcode() == ISD::SPLAT_VECTOR;
if (!Src0 || !Src1 || Index0 != Index1 ||
Src0.getValueType().getVectorElementType() != EltVT ||
Src1.getValueType().getVectorElementType() != EltVT ||
- !TLI.isExtractVecEltCheap(VT, Index0) ||
+ !(IsBothSplatVector || TLI.isExtractVecEltCheap(VT, Index0)) ||
!TLI.isOperationLegalOrCustom(Opcode, EltVT))
return SDValue();
@@ -23496,6 +23500,8 @@ static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG,
}
// bo (splat X, Index), (splat Y, Index) --> splat (bo X, Y), Index
+ if (VT.isScalableVector())
+ return DAG.getSplatVector(VT, DL, ScalarBO);
SmallVector<SDValue, 8> Ops(VT.getVectorNumElements(), ScalarBO);
return DAG.getBuildVector(VT, DL, Ops);
}
diff --git a/llvm/test/CodeGen/AArch64/active_lane_mask.ll b/llvm/test/CodeGen/AArch64/active_lane_mask.ll
index 4ec31ee82e8ce..1ddd24e6dd386 100644
--- a/llvm/test/CodeGen/AArch64/active_lane_mask.ll
+++ b/llvm/test/CodeGen/AArch64/active_lane_mask.ll
@@ -111,16 +111,16 @@ define <vscale x 8 x i1> @lane_mask_nxv8i1_i8(i8 %index, i8 %TC) {
define <vscale x 4 x i1> @lane_mask_nxv4i1_i8(i8 %index, i8 %TC) {
; CHECK-LABEL: lane_mask_nxv4i1_i8:
; CHECK: // %bb.0:
+; CHECK-NEXT: and w8, w0, #0xff
; CHECK-NEXT: index z0.s, #0, #1
-; CHECK-NEXT: mov z1.s, w0
; CHECK-NEXT: and z0.s, z0.s, #0xff
-; CHECK-NEXT: and z1.s, z1.s, #0xff
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: mov z1.s, w8
+; CHECK-NEXT: and w8, w1, #0xff
; CHECK-NEXT: add z0.s, z0.s, z1.s
-; CHECK-NEXT: mov z1.s, w1
; CHECK-NEXT: umin z0.s, z0.s, #255
-; CHECK-NEXT: and z1.s, z1.s, #0xff
; CHECK-NEXT: and z0.s, z0.s, #0xff
-; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: mov z1.s, w8
; CHECK-NEXT: cmphi p0.s, p0/z, z1.s, z0.s
; CHECK-NEXT: ret
%active.lane.mask = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i8(i8 %index, i8 %TC)
@@ -131,18 +131,18 @@ define <vscale x 2 x i1> @lane_mask_nxv2i1_i8(i8 %index, i8 %TC) {
; CHECK-LABEL: lane_mask_nxv2i1_i8:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
+; CHECK-NEXT: and x8, x0, #0xff
; CHECK-NEXT: index z0.d, #0, #1
-; CHECK-NEXT: mov z1.d, x0
+; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
+; CHECK-NEXT: and x9, x1, #0xff
; CHECK-NEXT: and z0.d, z0.d, #0xff
-; CHECK-NEXT: and z1.d, z1.d, #0xff
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: mov z1.d, x8
; CHECK-NEXT: add z0.d, z0.d, z1.d
-; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
-; CHECK-NEXT: mov z2.d, x1
+; CHECK-NEXT: mov z1.d, x9
; CHECK-NEXT: umin z0.d, z0.d, #255
-; CHECK-NEXT: and z2.d, z2.d, #0xff
; CHECK-NEXT: and z0.d, z0.d, #0xff
-; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: cmphi p0.d, p0/z, z2.d, z0.d
+; CHECK-NEXT: cmphi p0.d, p0/z, z1.d, z0.d
; CHECK-NEXT: ret
%active.lane.mask = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i8(i8 %index, i8 %TC)
ret <vscale x 2 x i1> %active.lane.mask
diff --git a/llvm/test/CodeGen/AArch64/fdiv-combine.ll b/llvm/test/CodeGen/AArch64/fdiv-combine.ll
index e4786bc5d48e0..57fef74e50275 100644
--- a/llvm/test/CodeGen/AArch64/fdiv-combine.ll
+++ b/llvm/test/CodeGen/AArch64/fdiv-combine.ll
@@ -136,11 +136,9 @@ entry:
define <vscale x 4 x float> @splat_fdiv_nxv4f32(float %D, <vscale x 4 x float> %a) #1 {
; CHECK-LABEL: splat_fdiv_nxv4f32:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: // kill: def $s0 killed $s0 def $z0
-; CHECK-NEXT: fmov z2.s, #1.00000000
-; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: fmov s2, #1.00000000
+; CHECK-NEXT: fdiv s0, s2, s0
; CHECK-NEXT: mov z0.s, s0
-; CHECK-NEXT: fdivr z0.s, p0/m, z0.s, z2.s
; CHECK-NEXT: fmul z0.s, z1.s, z0.s
; CHECK-NEXT: ret
entry:
@@ -153,11 +151,9 @@ entry:
define void @splat_three_fdiv_nxv4f32(float %D, <vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c) #1 {
; CHECK-LABEL: splat_three_fdiv_nxv4f32:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: // kill: def $s0 killed $s0 def $z0
-; CHECK-NEXT: fmov z4.s, #1.00000000
-; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: mov z0.s, s0
-; CHECK-NEXT: fdiv z4.s, p0/m, z4.s, z0.s
+; CHECK-NEXT: fmov s4, #1.00000000
+; CHECK-NEXT: fdiv s0, s4, s0
+; CHECK-NEXT: mov z4.s, s0
; CHECK-NEXT: fmul z0.s, z1.s, z4.s
; CHECK-NEXT: fmul z1.s, z2.s, z4.s
; CHECK-NEXT: fmul z2.s, z3.s, z4.s
@@ -190,11 +186,9 @@ entry:
define void @splat_two_fdiv_nxv2f64(double %D, <vscale x 2 x double> %a, <vscale x 2 x double> %b) #1 {
; CHECK-LABEL: splat_two_fdiv_nxv2f64:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT: fmov z3.d, #1.00000000
-; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: mov z0.d, d0
-; CHECK-NEXT: fdiv z3.d, p0/m, z3.d, z0.d
+; CHECK-NEXT: fmov d3, #1.00000000
+; CHECK-NEXT: fdiv d0, d3, d0
+; CHECK-NEXT: mov z3.d, d0
; CHECK-NEXT: fmul z0.d, z1.d, z3.d
; CHECK-NEXT: fmul z1.d, z2.d, z3.d
; CHECK-NEXT: b foo_2_nxv2f64
diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-select.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-select.ll
index 6d2d4227bfd36..14cc314b67dab 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-select.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-select.ll
@@ -169,7 +169,6 @@ define void @select_v8f32(<8 x float>* %a, <8 x float>* %b, i1 %mask) vscale_ran
; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1]
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: mov z2.s, w8
-; CHECK-NEXT: and z2.s, z2.s, #0x1
; CHECK-NEXT: cmpne p1.s, p1/z, z2.s, #0
; CHECK-NEXT: sel z0.s, p1, z0.s, z1.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
@@ -185,15 +184,14 @@ define void @select_v16f32(<16 x float>* %a, <16 x float>* %b, i1 %mask) #0 {
; VBITS_GE_256-LABEL: select_v16f32:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #8
-; VBITS_GE_256-NEXT: and w9, w2, #0x1
; VBITS_GE_256-NEXT: ptrue p0.s, vl8
+; VBITS_GE_256-NEXT: and w9, w2, #0x1
; VBITS_GE_256-NEXT: ptrue p1.s
; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0]
; VBITS_GE_256-NEXT: ld1w { z2.s }, p0/z, [x1, x8, lsl #2]
; VBITS_GE_256-NEXT: ld1w { z3.s }, p0/z, [x1]
; VBITS_GE_256-NEXT: mov z4.s, w9
-; VBITS_GE_256-NEXT: and z4.s, z4.s, #0x1
; VBITS_GE_256-NEXT: cmpne p1.s, p1/z, z4.s, #0
; VBITS_GE_256-NEXT: sel z1.s, p1, z1.s, z3.s
; VBITS_GE_256-NEXT: sel z0.s, p1, z0.s, z2.s
@@ -209,7 +207,6 @@ define void @select_v16f32(<16 x float>* %a, <16 x float>* %b, i1 %mask) #0 {
; VBITS_GE_512-NEXT: ld1w { z1.s }, p0/z, [x1]
; VBITS_GE_512-NEXT: ptrue p1.s
; VBITS_GE_512-NEXT: mov z2.s, w8
-; VBITS_GE_512-NEXT: and z2.s, z2.s, #0x1
; VBITS_GE_512-NEXT: cmpne p1.s, p1/z, z2.s, #0
; VBITS_GE_512-NEXT: sel z0.s, p1, z0.s, z1.s
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0]
@@ -230,7 +227,6 @@ define void @select_v32f32(<32 x float>* %a, <32 x float>* %b, i1 %mask) vscale_
; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1]
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: mov z2.s, w8
-; CHECK-NEXT: and z2.s, z2.s, #0x1
; CHECK-NEXT: cmpne p1.s, p1/z, z2.s, #0
; CHECK-NEXT: sel z0.s, p1, z0.s, z1.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
@@ -251,7 +247,6 @@ define void @select_v64f32(<64 x float>* %a, <64 x float>* %b, i1 %mask) vscale_
; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1]
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: mov z2.s, w8
-; CHECK-NEXT: and z2.s, z2.s, #0x1
; CHECK-NEXT: cmpne p1.s, p1/z, z2.s, #0
; CHECK-NEXT: sel z0.s, p1, z0.s, z1.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
@@ -298,7 +293,6 @@ define void @select_v4f64(<4 x double>* %a, <4 x double>* %b, i1 %mask) vscale_r
; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1]
; CHECK-NEXT: ptrue p1.d
; CHECK-NEXT: mov z2.d, x8
-; CHECK-NEXT: and z2.d, z2.d, #0x1
; CHECK-NEXT: cmpne p1.d, p1/z, z2.d, #0
; CHECK-NEXT: sel z0.d, p1, z0.d, z1.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
@@ -314,15 +308,14 @@ define void @select_v8f64(<8 x double>* %a, <8 x double>* %b, i1 %mask) #0 {
; VBITS_GE_256-LABEL: select_v8f64:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #4
-; VBITS_GE_256-NEXT: and w9, w2, #0x1
; VBITS_GE_256-NEXT: ptrue p0.d, vl4
+; VBITS_GE_256-NEXT: and w9, w2, #0x1
; VBITS_GE_256-NEXT: ptrue p1.d
; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0]
; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x1, x8, lsl #3]
; VBITS_GE_256-NEXT: ld1d { z3.d }, p0/z, [x1]
; VBITS_GE_256-NEXT: mov z4.d, x9
-; VBITS_GE_256-NEXT: and z4.d, z4.d, #0x1
; VBITS_GE_256-NEXT: cmpne p1.d, p1/z, z4.d, #0
; VBITS_GE_256-NEXT: sel z1.d, p1, z1.d, z3.d
; VBITS_GE_256-NEXT: sel z0.d, p1, z0.d, z2.d
@@ -338,7 +331,6 @@ define void @select_v8f64(<8 x double>* %a, <8 x double>* %b, i1 %mask) #0 {
; VBITS_GE_512-NEXT: ld1d { z1.d }, p0/z, [x1]
; VBITS_GE_512-NEXT: ptrue p1.d
; VBITS_GE_512-NEXT: mov z2.d, x8
-; VBITS_GE_512-NEXT: and z2.d, z2.d, #0x1
; VBITS_GE_512-NEXT: cmpne p1.d, p1/z, z2.d, #0
; VBITS_GE_512-NEXT: sel z0.d, p1, z0.d, z1.d
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x0]
@@ -359,7 +351,6 @@ define void @select_v16f64(<16 x double>* %a, <16 x double>* %b, i1 %mask) vscal
; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1]
; CHECK-NEXT: ptrue p1.d
; CHECK-NEXT: mov z2.d, x8
-; CHECK-NEXT: and z2.d, z2.d, #0x1
; CHECK-NEXT: cmpne p1.d, p1/z, z2.d, #0
; CHECK-NEXT: sel z0.d, p1, z0.d, z1.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
@@ -380,7 +371,6 @@ define void @select_v32f64(<32 x double>* %a, <32 x double>* %b, i1 %mask) vscal
; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1]
; CHECK-NEXT: ptrue p1.d
; CHECK-NEXT: mov z2.d, x8
-; CHECK-NEXT: and z2.d, z2.d, #0x1
; CHECK-NEXT: cmpne p1.d, p1/z, z2.d, #0
; CHECK-NEXT: sel z0.d, p1, z0.d, z1.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-select.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-select.ll
index 8b76c00631bb6..0194c9ff3832b 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-select.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-select.ll
@@ -298,7 +298,6 @@ define void @select_v8i32(<8 x i32>* %a, <8 x i32>* %b, i1 %mask) vscale_range(2
; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1]
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: mov z2.s, w8
-; CHECK-NEXT: and z2.s, z2.s, #0x1
; CHECK-NEXT: cmpne p1.s, p1/z, z2.s, #0
; CHECK-NEXT: sel z0.s, p1, z0.s, z1.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
@@ -314,15 +313,14 @@ define void @select_v16i32(<16 x i32>* %a, <16 x i32>* %b, i1 %mask) #0 {
; VBITS_GE_256-LABEL: select_v16i32:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #8
-; VBITS_GE_256-NEXT: and w9, w2, #0x1
; VBITS_GE_256-NEXT: ptrue p0.s, vl8
+; VBITS_GE_256-NEXT: and w9, w2, #0x1
; VBITS_GE_256-NEXT: ptrue p1.s
; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0]
; VBITS_GE_256-NEXT: ld1w { z2.s }, p0/z, [x1, x8, lsl #2]
; VBITS_GE_256-NEXT: ld1w { z3.s }, p0/z, [x1]
; VBITS_GE_256-NEXT: mov z4.s, w9
-; VBITS_GE_256-NEXT: and z4.s, z4.s, #0x1
; VBITS_GE_256-NEXT: cmpne p1.s, p1/z, z4.s, #0
; VBITS_GE_256-NEXT: sel z1.s, p1, z1.s, z3.s
; VBITS_GE_256-NEXT: sel z0.s, p1, z0.s, z2.s
@@ -338,7 +336,6 @@ define void @select_v16i32(<16 x i32>* %a, <16 x i32>* %b, i1 %mask) #0 {
; VBITS_GE_512-NEXT: ld1w { z1.s }, p0/z, [x1]
; VBITS_GE_512-NEXT: ptrue p1.s
; VBITS_GE_512-NEXT: mov z2.s, w8
-; VBITS_GE_512-NEXT: and z2.s, z2.s, #0x1
; VBITS_GE_512-NEXT: cmpne p1.s, p1/z, z2.s, #0
; VBITS_GE_512-NEXT: sel z0.s, p1, z0.s, z1.s
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0]
@@ -359,7 +356,6 @@ define void @select_v32i32(<32 x i32>* %a, <32 x i32>* %b, i1 %mask) vscale_rang
; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1]
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: mov z2.s, w8
-; CHECK-NEXT: and z2.s, z2.s, #0x1
; CHECK-NEXT: cmpne p1.s, p1/z, z2.s, #0
; CHECK-NEXT: sel z0.s, p1, z0.s, z1.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
@@ -380,7 +376,6 @@ define void @select_v64i32(<64 x i32>* %a, <64 x i32>* %b, i1 %mask) vscale_rang
; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1]
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: mov z2.s, w8
-; CHECK-NEXT: and z2.s, z2.s, #0x1
; CHECK-NEXT: cmpne p1.s, p1/z, z2.s, #0
; CHECK-NEXT: sel z0.s, p1, z0.s, z1.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
@@ -427,7 +422,6 @@ define void @select_v4i64(<4 x i64>* %a, <4 x i64>* %b, i1 %mask) vscale_range(2
; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1]
; CHECK-NEXT: ptrue p1.d
; CHECK-NEXT: mov z2.d, x8
-; CHECK-NEXT: and z2.d, z2.d, #0x1
; CHECK-NEXT: cmpne p1.d, p1/z, z2.d, #0
; CHECK-NEXT: sel z0.d, p1, z0.d, z1.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
@@ -443,15 +437,14 @@ define void @select_v8i64(<8 x i64>* %a, <8 x i64>* %b, i1 %mask) #0 {
; VBITS_GE_256-LABEL: select_v8i64:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #4
-; VBITS_GE_256-NEXT: and w9, w2, #0x1
; VBITS_GE_256-NEXT: ptrue p0.d, vl4
+; VBITS_GE_256-NEXT: and w9, w2, #0x1
; VBITS_GE_256-NEXT: ptrue p1.d
; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0]
; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x1, x8, lsl #3]
; VBITS_GE_256-NEXT: ld1d { z3.d }, p0/z, [x1]
; VBITS_GE_256-NEXT: mov z4.d, x9
-; VBITS_GE_256-NEXT: and z4.d, z4.d, #0x1
; VBITS_GE_256-NEXT: cmpne p1.d, p1/z, z4.d, #0
; VBITS_GE_256-NEXT: sel z1.d, p1, z1.d, z3.d
; VBITS_GE_256-NEXT: sel z0.d, p1, z0.d, z2.d
@@ -467,7 +460,6 @@ define void @select_v8i64(<8 x i64>* %a, <8 x i64>* %b, i1 %mask) #0 {
; VBITS_GE_512-NEXT: ld1d { z1.d }, p0/z, [x1]
; VBITS_GE_512-NEXT: ptrue p1.d
; VBITS_GE_512-NEXT: mov z2.d, x8
-; VBITS_GE_512-NEXT: and z2.d, z2.d, #0x1
; VBITS_GE_512-NEXT: cmpne p1.d, p1/z, z2.d, #0
; VBITS_GE_512-NEXT: sel z0.d, p1, z0.d, z1.d
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x0]
@@ -488,7 +480,6 @@ define void @select_v16i64(<16 x i64>* %a, <16 x i64>* %b, i1 %mask) vscale_rang
; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1]
; CHECK-NEXT: ptrue p1.d
; CHECK-NEXT: mov z2.d, x8
-; CHECK-NEXT: and z2.d, z2.d, #0x1
; CHECK-NEXT: cmpne p1.d, p1/z, z2.d, #0
; CHECK-NEXT: sel z0.d, p1, z0.d, z1.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
@@ -509,7 +500,6 @@ define void @select_v32i64(<32 x i64>* %a, <32 x i64>* %b, i1 %mask) vscale_rang
; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1]
; CHECK-NEXT: ptrue p1.d
; CHECK-NEXT: mov z2.d, x8
-; CHECK-NEXT: and z2.d, z2.d, #0x1
; CHECK-NEXT: cmpne p1.d, p1/z, z2.d, #0
; CHECK-NEXT: sel z0.d, p1, z0.d, z1.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
diff --git a/llvm/test/CodeGen/AArch64/sve-gep.ll b/llvm/test/CodeGen/AArch64/sve-gep.ll
index d2445aff408be..209ebd33dd82b 100644
--- a/llvm/test/CodeGen/AArch64/sve-gep.ll
+++ b/llvm/test/CodeGen/AArch64/sve-gep.ll
@@ -56,8 +56,8 @@ define <2 x <vscale x 2 x i64>*> @fixed_of_scalable_2(<2 x <vscale x 2 x i64>*>
define <vscale x 2 x i8*> @scalable_of_fixed_1(i8* %base) {
; CHECK-LABEL: scalable_of_fixed_1:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov z0.d, x0
-; CHECK-NEXT: add z0.d, z0.d, #1 // =0x1
+; CHECK-NEXT: add x8, x0, #1
+; CHECK-NEXT: mov z0.d, x8
; CHECK-NEXT: ret
%idx = shufflevector <vscale x 2 x i64> insertelement (<vscale x 2 x i64> undef, i64 1, i32 0), <vscale x 2 x i64> zeroinitializer, <vscale x 2 x i32> zeroinitializer
%d = getelementptr i8, i8* %base, <vscale x 2 x i64> %idx
@@ -202,8 +202,8 @@ define <vscale x 2 x i64*> @scalable_of_fixed_5_i64(i64* %base, <vscale x 2 x i3
define <vscale x 2 x <vscale x 2 x i64>*> @scalable_of_scalable_1(<vscale x 2 x i64>* %base) {
; CHECK-LABEL: scalable_of_scalable_1:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov z0.d, x0
-; CHECK-NEXT: incd z0.d, all, mul #8
+; CHECK-NEXT: addvl x8, x0, #1
+; CHECK-NEXT: mov z0.d, x8
; CHECK-NEXT: ret
%idx = shufflevector <vscale x 2 x i64> insertelement (<vscale x 2 x i64> undef, i64 1, i32 0), <vscale x 2 x i64> zeroinitializer, <vscale x 2 x i32> zeroinitializer
%d = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* %base, <vscale x 2 x i64> %idx
diff --git a/llvm/test/CodeGen/Hexagon/autohvx/isel-concat-multiple.ll b/llvm/test/CodeGen/Hexagon/autohvx/isel-concat-multiple.ll
index 4645ebce5de6b..bcab26101ccb6 100644
--- a/llvm/test/CodeGen/Hexagon/autohvx/isel-concat-multiple.ll
+++ b/llvm/test/CodeGen/Hexagon/autohvx/isel-concat-multiple.ll
@@ -2,7 +2,7 @@
; This code generates a concat_vectors with more than 2 inputs. Make sure
; that this compiles successfully.
-; CHECK: vlsr
+; CHECK: lsr
target datalayout = "e-m:e-p:32:32:32-a:0-n16:32-i64:64:64-i32:32:32-i16:16:16-i1:8:8-f32:32:32-f64:64:64-v32:32:32-v64:64:64-v512:512:512-v1024:1024:1024-v2048:2048:2048"
target triple = "hexagon"
diff --git a/llvm/test/CodeGen/RISCV/rvv/vadd-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vadd-sdnode.ll
index ec5267694a8ed..12bacea881971 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vadd-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vadd-sdnode.ll
@@ -879,9 +879,9 @@ define <vscale x 8 x i64> @vadd_xx_nxv8i64(i64 %a, i64 %b) nounwind {
;
; RV64-LABEL: vadd_xx_nxv8i64:
; RV64: # %bb.0:
-; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu
+; RV64-NEXT: add a0, a0, a1
+; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu
; RV64-NEXT: vmv.v.x v8, a0
-; RV64-NEXT: vadd.vx v8, v8, a1
; RV64-NEXT: ret
%head1 = insertelement <vscale x 8 x i64> poison, i64 %a, i32 0
%splat1 = shufflevector <vscale x 8 x i64> %head1, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
diff --git a/llvm/test/CodeGen/RISCV/rvv/vand-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vand-sdnode.ll
index 52b1e69988297..5c6d97d4b5e22 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vand-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vand-sdnode.ll
@@ -1370,9 +1370,9 @@ define <vscale x 8 x i64> @vand_xx_nxv8i64(i64 %a, i64 %b) nounwind {
;
; RV64-LABEL: vand_xx_nxv8i64:
; RV64: # %bb.0:
-; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu
+; RV64-NEXT: and a0, a0, a1
+; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu
; RV64-NEXT: vmv.v.x v8, a0
-; RV64-NEXT: vand.vx v8, v8, a1
; RV64-NEXT: ret
%head1 = insertelement <vscale x 8 x i64> poison, i64 %a, i32 0
%splat1 = shufflevector <vscale x 8 x i64> %head1, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmul-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vmul-sdnode.ll
index bcbf8bafc7374..2682bf8792f36 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmul-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmul-sdnode.ll
@@ -1,9 +1,9 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
-; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64NOM
; RUN: llc -mtriple=riscv32 -mattr=+v,+m -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
-; RUN: llc -mtriple=riscv64 -mattr=+v,+m -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
+; RUN: llc -mtriple=riscv64 -mattr=+v,+m -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64M
define <vscale x 1 x i8> @vmul_vv_nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i8> %vb) {
; CHECK-LABEL: vmul_vv_nxv1i8:
@@ -939,12 +939,19 @@ define <vscale x 8 x i64> @vmul_xx_nxv8i64(i64 %a, i64 %b) nounwind {
; RV32-NEXT: addi sp, sp, 16
; RV32-NEXT: ret
;
-; RV64-LABEL: vmul_xx_nxv8i64:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu
-; RV64-NEXT: vmv.v.x v8, a0
-; RV64-NEXT: vmul.vx v8, v8, a1
-; RV64-NEXT: ret
+; RV64NOM-LABEL: vmul_xx_nxv8i64:
+; RV64NOM: # %bb.0:
+; RV64NOM-NEXT: vsetvli a2, zero, e64, m8, ta, mu
+; RV64NOM-NEXT: vmv.v.x v8, a0
+; RV64NOM-NEXT: vmul.vx v8, v8, a1
+; RV64NOM-NEXT: ret
+;
+; RV64M-LABEL: vmul_xx_nxv8i64:
+; RV64M: # %bb.0:
+; RV64M-NEXT: mul a0, a0, a1
+; RV64M-NEXT: vsetvli a1, zero, e64, m8, ta, mu
+; RV64M-NEXT: vmv.v.x v8, a0
+; RV64M-NEXT: ret
%head1 = insertelement <vscale x 8 x i64> poison, i64 %a, i32 0
%splat1 = shufflevector <vscale x 8 x i64> %head1, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
%head2 = insertelement <vscale x 8 x i64> poison, i64 %b, i32 0
diff --git a/llvm/test/CodeGen/RISCV/rvv/vor-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vor-sdnode.ll
index 57b558a77528c..b932e1774044e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vor-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vor-sdnode.ll
@@ -1163,9 +1163,9 @@ define <vscale x 8 x i64> @vor_xx_nxv8i64(i64 %a, i64 %b) nounwind {
;
; RV64-LABEL: vor_xx_nxv8i64:
; RV64: # %bb.0:
-; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu
+; RV64-NEXT: or a0, a0, a1
+; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu
; RV64-NEXT: vmv.v.x v8, a0
-; RV64-NEXT: vor.vx v8, v8, a1
; RV64-NEXT: ret
%head1 = insertelement <vscale x 8 x i64> poison, i64 %a, i32 0
%splat1 = shufflevector <vscale x 8 x i64> %head1, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
diff --git a/llvm/test/CodeGen/RISCV/rvv/vsub-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vsub-sdnode.ll
index 450605560ec96..01e43876d09b6 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vsub-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vsub-sdnode.ll
@@ -857,9 +857,9 @@ define <vscale x 8 x i64> @vsub_xx_nxv8i64(i64 %a, i64 %b) nounwind {
;
; RV64-LABEL: vsub_xx_nxv8i64:
; RV64: # %bb.0:
-; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu
+; RV64-NEXT: sub a0, a0, a1
+; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu
; RV64-NEXT: vmv.v.x v8, a0
-; RV64-NEXT: vsub.vx v8, v8, a1
; RV64-NEXT: ret
%head1 = insertelement <vscale x 8 x i64> poison, i64 %a, i32 0
%splat1 = shufflevector <vscale x 8 x i64> %head1, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
diff --git a/llvm/test/CodeGen/RISCV/rvv/vxor-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vxor-sdnode.ll
index e8214569bcb40..c13b734a2c83e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vxor-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vxor-sdnode.ll
@@ -1370,9 +1370,9 @@ define <vscale x 8 x i64> @vxor_xx_nxv8i64(i64 %a, i64 %b) nounwind {
;
; RV64-LABEL: vxor_xx_nxv8i64:
; RV64: # %bb.0:
-; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu
+; RV64-NEXT: xor a0, a0, a1
+; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu
; RV64-NEXT: vmv.v.x v8, a0
-; RV64-NEXT: vxor.vx v8, v8, a1
; RV64-NEXT: ret
%head1 = insertelement <vscale x 8 x i64> poison, i64 %a, i32 0
%splat1 = shufflevector <vscale x 8 x i64> %head1, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
More information about the llvm-commits
mailing list