[llvm] 275658d - [SelectionDAG] Implicitly truncate known bits in SPLAT_VECTOR
Luke Lau via llvm-commits
llvm-commits at lists.llvm.org
Fri Jan 6 07:44:00 PST 2023
Author: Luke Lau
Date: 2023-01-06T15:43:47Z
New Revision: 275658d1af11a2fbbbbad3d70629afc75b9e107c
URL: https://github.com/llvm/llvm-project/commit/275658d1af11a2fbbbbad3d70629afc75b9e107c
DIFF: https://github.com/llvm/llvm-project/commit/275658d1af11a2fbbbbad3d70629afc75b9e107c.diff
LOG: [SelectionDAG] Implicitly truncate known bits in SPLAT_VECTOR
Now that D139525 fixes the Hexagon infinite loop, the stopgap can be
removed to provide more information about known bits in SPLAT_VECTOR
whose operands are smaller than the bit width (which is most of the
time)
Reviewed By: reames
Differential Revision: https://reviews.llvm.org/D141075
Added:
Modified:
llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
llvm/test/CodeGen/AArch64/active_lane_mask.ll
llvm/test/CodeGen/AArch64/sve-fixed-length-fp-select.ll
llvm/test/CodeGen/AArch64/sve-fixed-length-int-select.ll
llvm/test/CodeGen/AArch64/sve-umulo-sdnode.ll
llvm/test/CodeGen/WebAssembly/pr59626.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index e1e95f7004fd..3805f70df07a 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -2977,17 +2977,11 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
Depth + 1);
case ISD::SPLAT_VECTOR: {
SDValue SrcOp = Op.getOperand(0);
- if (SrcOp.getValueSizeInBits() != BitWidth) {
- assert(SrcOp.getValueSizeInBits() > BitWidth &&
- "Expected SPLAT_VECTOR implicit truncation");
- // FIXME: We should be able to truncate the known bits here to match
- // the official semantics of SPLAT_VECTOR, but doing so exposes a
- // Hexagon target bug which results in an infinite loop during
- // DAGCombine. (See D137140 for repo). Once that's fixed, we can
- // strengthen this.
- break;
- }
- Known = computeKnownBits(SrcOp, Depth + 1);
+ assert(SrcOp.getValueSizeInBits() >= BitWidth &&
+ "Expected SPLAT_VECTOR implicit truncation");
+ // Implicitly truncate the bits to match the official semantics of
+ // SPLAT_VECTOR.
+ Known = computeKnownBits(SrcOp, Depth + 1).trunc(BitWidth);
break;
}
case ISD::BUILD_VECTOR:
diff --git a/llvm/test/CodeGen/AArch64/active_lane_mask.ll b/llvm/test/CodeGen/AArch64/active_lane_mask.ll
index cb2b498f274b..09cb0aa28b10 100644
--- a/llvm/test/CodeGen/AArch64/active_lane_mask.ll
+++ b/llvm/test/CodeGen/AArch64/active_lane_mask.ll
@@ -100,7 +100,6 @@ define <vscale x 8 x i1> @lane_mask_nxv8i1_i8(i8 %index, i8 %TC) {
; CHECK-NEXT: mov z1.h, w1
; CHECK-NEXT: umin z0.h, z0.h, #255
; CHECK-NEXT: and z1.h, z1.h, #0xff
-; CHECK-NEXT: and z0.h, z0.h, #0xff
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: cmphi p0.h, p0/z, z1.h, z0.h
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-select.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-select.ll
index 95c312db049e..553acc793098 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-select.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-select.ll
@@ -40,7 +40,6 @@ define void @select_v16f16(ptr %a, ptr %b, i1 %mask) vscale_range(2,0) #0 {
; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1]
; CHECK-NEXT: ptrue p1.h
; CHECK-NEXT: mov z2.h, w8
-; CHECK-NEXT: and z2.h, z2.h, #0x1
; CHECK-NEXT: cmpne p1.h, p1/z, z2.h, #0
; CHECK-NEXT: sel z0.h, p1, z0.h, z1.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
@@ -56,15 +55,14 @@ define void @select_v32f16(ptr %a, ptr %b, i1 %mask) #0 {
; VBITS_GE_256-LABEL: select_v32f16:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #16
-; VBITS_GE_256-NEXT: and w9, w2, #0x1
; VBITS_GE_256-NEXT: ptrue p0.h, vl16
+; VBITS_GE_256-NEXT: and w9, w2, #0x1
; VBITS_GE_256-NEXT: ptrue p1.h
; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1]
; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0]
; VBITS_GE_256-NEXT: ld1h { z2.h }, p0/z, [x1, x8, lsl #1]
; VBITS_GE_256-NEXT: ld1h { z3.h }, p0/z, [x1]
; VBITS_GE_256-NEXT: mov z4.h, w9
-; VBITS_GE_256-NEXT: and z4.h, z4.h, #0x1
; VBITS_GE_256-NEXT: cmpne p1.h, p1/z, z4.h, #0
; VBITS_GE_256-NEXT: sel z1.h, p1, z1.h, z3.h
; VBITS_GE_256-NEXT: sel z0.h, p1, z0.h, z2.h
@@ -80,7 +78,6 @@ define void @select_v32f16(ptr %a, ptr %b, i1 %mask) #0 {
; VBITS_GE_512-NEXT: ld1h { z1.h }, p0/z, [x1]
; VBITS_GE_512-NEXT: ptrue p1.h
; VBITS_GE_512-NEXT: mov z2.h, w8
-; VBITS_GE_512-NEXT: and z2.h, z2.h, #0x1
; VBITS_GE_512-NEXT: cmpne p1.h, p1/z, z2.h, #0
; VBITS_GE_512-NEXT: sel z0.h, p1, z0.h, z1.h
; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x0]
@@ -101,7 +98,6 @@ define void @select_v64f16(ptr %a, ptr %b, i1 %mask) vscale_range(8,0) #0 {
; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1]
; CHECK-NEXT: ptrue p1.h
; CHECK-NEXT: mov z2.h, w8
-; CHECK-NEXT: and z2.h, z2.h, #0x1
; CHECK-NEXT: cmpne p1.h, p1/z, z2.h, #0
; CHECK-NEXT: sel z0.h, p1, z0.h, z1.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
@@ -122,7 +118,6 @@ define void @select_v128f16(ptr %a, ptr %b, i1 %mask) vscale_range(16,0) #0 {
; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1]
; CHECK-NEXT: ptrue p1.h
; CHECK-NEXT: mov z2.h, w8
-; CHECK-NEXT: and z2.h, z2.h, #0x1
; CHECK-NEXT: cmpne p1.h, p1/z, z2.h, #0
; CHECK-NEXT: sel z0.h, p1, z0.h, z1.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-select.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-select.ll
index e10a3531f8a0..19d7c4212842 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-select.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-select.ll
@@ -40,7 +40,6 @@ define void @select_v32i8(ptr %a, ptr %b, i1 %mask) vscale_range(2,0) #0 {
; CHECK-NEXT: ld1b { z1.b }, p0/z, [x1]
; CHECK-NEXT: ptrue p1.b
; CHECK-NEXT: mov z2.b, w8
-; CHECK-NEXT: and z2.b, z2.b, #0x1
; CHECK-NEXT: cmpne p1.b, p1/z, z2.b, #0
; CHECK-NEXT: sel z0.b, p1, z0.b, z1.b
; CHECK-NEXT: st1b { z0.b }, p0, [x0]
@@ -56,15 +55,14 @@ define void @select_v64i8(ptr %a, ptr %b, i1 %mask) #0 {
; VBITS_GE_256-LABEL: select_v64i8:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov w8, #32
-; VBITS_GE_256-NEXT: and w9, w2, #0x1
; VBITS_GE_256-NEXT: ptrue p0.b, vl32
+; VBITS_GE_256-NEXT: and w9, w2, #0x1
; VBITS_GE_256-NEXT: ptrue p1.b
; VBITS_GE_256-NEXT: ld1b { z0.b }, p0/z, [x0, x8]
; VBITS_GE_256-NEXT: ld1b { z1.b }, p0/z, [x0]
; VBITS_GE_256-NEXT: ld1b { z2.b }, p0/z, [x1, x8]
; VBITS_GE_256-NEXT: ld1b { z3.b }, p0/z, [x1]
; VBITS_GE_256-NEXT: mov z4.b, w9
-; VBITS_GE_256-NEXT: and z4.b, z4.b, #0x1
; VBITS_GE_256-NEXT: cmpne p1.b, p1/z, z4.b, #0
; VBITS_GE_256-NEXT: sel z1.b, p1, z1.b, z3.b
; VBITS_GE_256-NEXT: sel z0.b, p1, z0.b, z2.b
@@ -80,7 +78,6 @@ define void @select_v64i8(ptr %a, ptr %b, i1 %mask) #0 {
; VBITS_GE_512-NEXT: ld1b { z1.b }, p0/z, [x1]
; VBITS_GE_512-NEXT: ptrue p1.b
; VBITS_GE_512-NEXT: mov z2.b, w8
-; VBITS_GE_512-NEXT: and z2.b, z2.b, #0x1
; VBITS_GE_512-NEXT: cmpne p1.b, p1/z, z2.b, #0
; VBITS_GE_512-NEXT: sel z0.b, p1, z0.b, z1.b
; VBITS_GE_512-NEXT: st1b { z0.b }, p0, [x0]
@@ -101,7 +98,6 @@ define void @select_v128i8(ptr %a, ptr %b, i1 %mask) vscale_range(8,0) #0 {
; CHECK-NEXT: ld1b { z1.b }, p0/z, [x1]
; CHECK-NEXT: ptrue p1.b
; CHECK-NEXT: mov z2.b, w8
-; CHECK-NEXT: and z2.b, z2.b, #0x1
; CHECK-NEXT: cmpne p1.b, p1/z, z2.b, #0
; CHECK-NEXT: sel z0.b, p1, z0.b, z1.b
; CHECK-NEXT: st1b { z0.b }, p0, [x0]
@@ -122,7 +118,6 @@ define void @select_v256i8(ptr %a, ptr %b, i1 %mask) vscale_range(16,0) #0 {
; CHECK-NEXT: ld1b { z1.b }, p0/z, [x1]
; CHECK-NEXT: ptrue p1.b
; CHECK-NEXT: mov z2.b, w8
-; CHECK-NEXT: and z2.b, z2.b, #0x1
; CHECK-NEXT: cmpne p1.b, p1/z, z2.b, #0
; CHECK-NEXT: sel z0.b, p1, z0.b, z1.b
; CHECK-NEXT: st1b { z0.b }, p0, [x0]
@@ -169,7 +164,6 @@ define void @select_v16i16(ptr %a, ptr %b, i1 %mask) vscale_range(2,0) #0 {
; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1]
; CHECK-NEXT: ptrue p1.h
; CHECK-NEXT: mov z2.h, w8
-; CHECK-NEXT: and z2.h, z2.h, #0x1
; CHECK-NEXT: cmpne p1.h, p1/z, z2.h, #0
; CHECK-NEXT: sel z0.h, p1, z0.h, z1.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
@@ -185,15 +179,14 @@ define void @select_v32i16(ptr %a, ptr %b, i1 %mask) #0 {
; VBITS_GE_256-LABEL: select_v32i16:
; VBITS_GE_256: // %bb.0:
; VBITS_GE_256-NEXT: mov x8, #16
-; VBITS_GE_256-NEXT: and w9, w2, #0x1
; VBITS_GE_256-NEXT: ptrue p0.h, vl16
+; VBITS_GE_256-NEXT: and w9, w2, #0x1
; VBITS_GE_256-NEXT: ptrue p1.h
; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1]
; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0]
; VBITS_GE_256-NEXT: ld1h { z2.h }, p0/z, [x1, x8, lsl #1]
; VBITS_GE_256-NEXT: ld1h { z3.h }, p0/z, [x1]
; VBITS_GE_256-NEXT: mov z4.h, w9
-; VBITS_GE_256-NEXT: and z4.h, z4.h, #0x1
; VBITS_GE_256-NEXT: cmpne p1.h, p1/z, z4.h, #0
; VBITS_GE_256-NEXT: sel z1.h, p1, z1.h, z3.h
; VBITS_GE_256-NEXT: sel z0.h, p1, z0.h, z2.h
@@ -209,7 +202,6 @@ define void @select_v32i16(ptr %a, ptr %b, i1 %mask) #0 {
; VBITS_GE_512-NEXT: ld1h { z1.h }, p0/z, [x1]
; VBITS_GE_512-NEXT: ptrue p1.h
; VBITS_GE_512-NEXT: mov z2.h, w8
-; VBITS_GE_512-NEXT: and z2.h, z2.h, #0x1
; VBITS_GE_512-NEXT: cmpne p1.h, p1/z, z2.h, #0
; VBITS_GE_512-NEXT: sel z0.h, p1, z0.h, z1.h
; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x0]
@@ -230,7 +222,6 @@ define void @select_v64i16(ptr %a, ptr %b, i1 %mask) vscale_range(8,0) #0 {
; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1]
; CHECK-NEXT: ptrue p1.h
; CHECK-NEXT: mov z2.h, w8
-; CHECK-NEXT: and z2.h, z2.h, #0x1
; CHECK-NEXT: cmpne p1.h, p1/z, z2.h, #0
; CHECK-NEXT: sel z0.h, p1, z0.h, z1.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
@@ -251,7 +242,6 @@ define void @select_v128i16(ptr %a, ptr %b, i1 %mask) vscale_range(16,0) #0 {
; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1]
; CHECK-NEXT: ptrue p1.h
; CHECK-NEXT: mov z2.h, w8
-; CHECK-NEXT: and z2.h, z2.h, #0x1
; CHECK-NEXT: cmpne p1.h, p1/z, z2.h, #0
; CHECK-NEXT: sel z0.h, p1, z0.h, z1.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
diff --git a/llvm/test/CodeGen/AArch64/sve-umulo-sdnode.ll b/llvm/test/CodeGen/AArch64/sve-umulo-sdnode.ll
index 8b2455bff61e..8ef7b8032cc0 100644
--- a/llvm/test/CodeGen/AArch64/sve-umulo-sdnode.ll
+++ b/llvm/test/CodeGen/AArch64/sve-umulo-sdnode.ll
@@ -49,15 +49,10 @@ define <vscale x 8 x i8> @umulo_nxv8i8(<vscale x 8 x i8> %x, <vscale x 8 x i8> %
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: and z1.h, z1.h, #0xff
; CHECK-NEXT: and z0.h, z0.h, #0xff
-; CHECK-NEXT: movprfx z2, z0
-; CHECK-NEXT: mul z2.h, p0/m, z2.h, z1.h
-; CHECK-NEXT: umulh z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT: lsr z1.h, z2.h, #8
-; CHECK-NEXT: cmpne p1.h, p0/z, z0.h, #0
+; CHECK-NEXT: mul z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: lsr z1.h, z0.h, #8
; CHECK-NEXT: cmpne p0.h, p0/z, z1.h, #0
-; CHECK-NEXT: sel p0.b, p0, p0.b, p1.b
-; CHECK-NEXT: mov z2.h, p0/m, #0 // =0x0
-; CHECK-NEXT: mov z0.d, z2.d
+; CHECK-NEXT: mov z0.h, p0/m, #0 // =0x0
; CHECK-NEXT: ret
%a = call { <vscale x 8 x i8>, <vscale x 8 x i1> } @llvm.umul.with.overflow.nxv8i8(<vscale x 8 x i8> %x, <vscale x 8 x i8> %y)
%b = extractvalue { <vscale x 8 x i8>, <vscale x 8 x i1> } %a, 0
diff --git a/llvm/test/CodeGen/WebAssembly/pr59626.ll b/llvm/test/CodeGen/WebAssembly/pr59626.ll
index 33b85fb2de09..a2324b0b48a7 100644
--- a/llvm/test/CodeGen/WebAssembly/pr59626.ll
+++ b/llvm/test/CodeGen/WebAssembly/pr59626.ll
@@ -5,79 +5,38 @@
define i8 @f(ptr %0, ptr %1) {
; CHECK-32-LABEL: f:
; CHECK-32: .functype f (i32, i32) -> (i32)
-; CHECK-32-NEXT: .local v128
; CHECK-32-NEXT: # %bb.0: # %BB
; CHECK-32-NEXT: local.get 0
; CHECK-32-NEXT: i32.const 0
; CHECK-32-NEXT: i32.store8 2
; CHECK-32-NEXT: local.get 0
-; CHECK-32-NEXT: v128.const 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
-; CHECK-32-NEXT: v128.store16_lane 0, 0
-; CHECK-32-NEXT: local.get 1
-; CHECK-32-NEXT: i32.const 5
-; CHECK-32-NEXT: v128.const 0, 0
-; CHECK-32-NEXT: i32x4.extract_lane 0
-; CHECK-32-NEXT: i8x16.splat
-; CHECK-32-NEXT: v128.const 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
-; CHECK-32-NEXT: v128.and
-; CHECK-32-NEXT: local.tee 2
-; CHECK-32-NEXT: i8x16.extract_lane_u 2
-; CHECK-32-NEXT: i32.div_u
-; CHECK-32-NEXT: i32.store8 2
+; CHECK-32-NEXT: i32.const 0
+; CHECK-32-NEXT: i32.store16 0
; CHECK-32-NEXT: local.get 1
-; CHECK-32-NEXT: i32.const 1
-; CHECK-32-NEXT: local.get 2
-; CHECK-32-NEXT: i8x16.extract_lane_u 0
-; CHECK-32-NEXT: local.tee 0
-; CHECK-32-NEXT: i32.const 1
-; CHECK-32-NEXT: i32.and
-; CHECK-32-NEXT: i32.div_u
+; CHECK-32-NEXT: local.get 0
; CHECK-32-NEXT: i8x16.splat
-; CHECK-32-NEXT: i32.const 3
-; CHECK-32-NEXT: local.get 2
-; CHECK-32-NEXT: i8x16.extract_lane_u 1
-; CHECK-32-NEXT: i32.div_u
-; CHECK-32-NEXT: i8x16.replace_lane 1
; CHECK-32-NEXT: v128.store16_lane 0, 0
-; CHECK-32-NEXT: local.get 0
+; CHECK-32-NEXT: v128.const 0, 0
+; CHECK-32-NEXT: i32x4.extract_lane 0
; CHECK-32-NEXT: # fallthrough-return
;
; CHECK-64-LABEL: f:
; CHECK-64: .functype f (i64, i64) -> (i32)
-; CHECK-64-NEXT: .local v128, i32
+; CHECK-64-NEXT: .local i32
; CHECK-64-NEXT: # %bb.0: # %BB
; CHECK-64-NEXT: local.get 0
; CHECK-64-NEXT: i32.const 0
; CHECK-64-NEXT: i32.store8 2
; CHECK-64-NEXT: local.get 0
-; CHECK-64-NEXT: v128.const 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
-; CHECK-64-NEXT: v128.store16_lane 0, 0
-; CHECK-64-NEXT: drop
-; CHECK-64-NEXT: local.get 1
-; CHECK-64-NEXT: i32.const 5
-; CHECK-64-NEXT: v128.const 0, 0
-; CHECK-64-NEXT: i32x4.extract_lane 0
-; CHECK-64-NEXT: i8x16.splat
-; CHECK-64-NEXT: v128.const 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
-; CHECK-64-NEXT: v128.and
-; CHECK-64-NEXT: local.tee 2
-; CHECK-64-NEXT: i8x16.extract_lane_u 2
-; CHECK-64-NEXT: i32.const 1
-; CHECK-64-NEXT: i32.and
-; CHECK-64-NEXT: i32.div_u
-; CHECK-64-NEXT: i32.store8 2
+; CHECK-64-NEXT: i32.const 0
+; CHECK-64-NEXT: i32.store16 0
; CHECK-64-NEXT: local.get 1
-; CHECK-64-NEXT: i32.const 1
; CHECK-64-NEXT: local.get 2
-; CHECK-64-NEXT: i8x16.extract_lane_u 0
-; CHECK-64-NEXT: local.tee 3
-; CHECK-64-NEXT: i32.const 1
-; CHECK-64-NEXT: i32.and
-; CHECK-64-NEXT: i32.div_u
; CHECK-64-NEXT: i8x16.splat
; CHECK-64-NEXT: v128.store16_lane 0, 0
; CHECK-64-NEXT: drop
-; CHECK-64-NEXT: local.get 3
+; CHECK-64-NEXT: v128.const 0, 0
+; CHECK-64-NEXT: i32x4.extract_lane 0
; CHECK-64-NEXT: # fallthrough-return
BB:
store <3 x i8> zeroinitializer, ptr %0
More information about the llvm-commits
mailing list