[llvm] c5a21c1 - [PhaseOrdering][X86] Add test coverage based off #111431
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Tue Dec 10 09:31:20 PST 2024
Author: Simon Pilgrim
Date: 2024-12-10T17:31:08Z
New Revision: c5a21c115856298fcc04f343f573965da7b14af4
URL: https://github.com/llvm/llvm-project/commit/c5a21c115856298fcc04f343f573965da7b14af4
DIFF: https://github.com/llvm/llvm-project/commit/c5a21c115856298fcc04f343f573965da7b14af4.diff
LOG: [PhaseOrdering][X86] Add test coverage based off #111431
Add tests for the concatenation of boolean vectors bitcast to integers - similar to the MOVMSK pattern.
Added:
llvm/test/Transforms/PhaseOrdering/X86/concat-boolmasks.ll
Modified:
Removed:
################################################################################
diff --git a/llvm/test/Transforms/PhaseOrdering/X86/concat-boolmasks.ll b/llvm/test/Transforms/PhaseOrdering/X86/concat-boolmasks.ll
new file mode 100644
index 00000000000000..07bfbffa9518fa
--- /dev/null
+++ b/llvm/test/Transforms/PhaseOrdering/X86/concat-boolmasks.ll
@@ -0,0 +1,252 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -O3 -S -mtriple=x86_64-- -mcpu=x86-64 | FileCheck %s
+; RUN: opt < %s -O3 -S -mtriple=x86_64-- -mcpu=x86-64-v2 | FileCheck %s
+; RUN: opt < %s -O3 -S -mtriple=x86_64-- -mcpu=x86-64-v3 | FileCheck %s
+; RUN: opt < %s -O3 -S -mtriple=x86_64-- -mcpu=x86-64-v4 | FileCheck %s
+
+define i32 @movmsk_i32_v32i8_v16i8(<16 x i8> %v0, <16 x i8> %v1) {
+; CHECK-LABEL: @movmsk_i32_v32i8_v16i8(
+; CHECK-NEXT: [[C0:%.*]] = icmp slt <16 x i8> [[V0:%.*]], zeroinitializer
+; CHECK-NEXT: [[C1:%.*]] = icmp slt <16 x i8> [[V1:%.*]], zeroinitializer
+; CHECK-NEXT: [[B0:%.*]] = bitcast <16 x i1> [[C0]] to i16
+; CHECK-NEXT: [[B1:%.*]] = bitcast <16 x i1> [[C1]] to i16
+; CHECK-NEXT: [[Z0:%.*]] = zext i16 [[B0]] to i32
+; CHECK-NEXT: [[Z1:%.*]] = zext i16 [[B1]] to i32
+; CHECK-NEXT: [[S0:%.*]] = shl nuw i32 [[Z0]], 16
+; CHECK-NEXT: [[OR:%.*]] = or disjoint i32 [[S0]], [[Z1]]
+; CHECK-NEXT: ret i32 [[OR]]
+;
+ %c0 = icmp slt <16 x i8> %v0, zeroinitializer
+ %c1 = icmp slt <16 x i8> %v1, zeroinitializer
+ %b0 = bitcast <16 x i1> %c0 to i16
+ %b1 = bitcast <16 x i1> %c1 to i16
+ %z0 = zext i16 %b0 to i32
+ %z1 = zext i16 %b1 to i32
+ %s0 = shl nuw i32 %z0, 16
+ %or = or disjoint i32 %s0, %z1
+ ret i32 %or
+}
+
+define i32 @movmsk_i32_v8i32_v4i32(<4 x i32> %v0, <4 x i32> %v1) {
+; CHECK-LABEL: @movmsk_i32_v8i32_v4i32(
+; CHECK-NEXT: [[C0:%.*]] = icmp slt <4 x i32> [[V0:%.*]], zeroinitializer
+; CHECK-NEXT: [[C1:%.*]] = icmp slt <4 x i32> [[V1:%.*]], zeroinitializer
+; CHECK-NEXT: [[B0:%.*]] = bitcast <4 x i1> [[C0]] to i4
+; CHECK-NEXT: [[B1:%.*]] = bitcast <4 x i1> [[C1]] to i4
+; CHECK-NEXT: [[Z0:%.*]] = zext i4 [[B0]] to i32
+; CHECK-NEXT: [[Z1:%.*]] = zext i4 [[B1]] to i32
+; CHECK-NEXT: [[S0:%.*]] = shl nuw nsw i32 [[Z0]], 4
+; CHECK-NEXT: [[OR:%.*]] = or disjoint i32 [[S0]], [[Z1]]
+; CHECK-NEXT: ret i32 [[OR]]
+;
+ %c0 = icmp slt <4 x i32> %v0, zeroinitializer
+ %c1 = icmp slt <4 x i32> %v1, zeroinitializer
+ %b0 = bitcast <4 x i1> %c0 to i4
+ %b1 = bitcast <4 x i1> %c1 to i4
+ %z0 = zext i4 %b0 to i32
+ %z1 = zext i4 %b1 to i32
+ %s0 = shl nuw i32 %z0, 4
+ %or = or disjoint i32 %s0, %z1
+ ret i32 %or
+}
+
+define i64 @movmsk_i64_v32i8_v16i8(<16 x i8> %v0, <16 x i8> %v1) {
+; CHECK-LABEL: @movmsk_i64_v32i8_v16i8(
+; CHECK-NEXT: [[C0:%.*]] = icmp slt <16 x i8> [[V0:%.*]], zeroinitializer
+; CHECK-NEXT: [[C1:%.*]] = icmp slt <16 x i8> [[V1:%.*]], zeroinitializer
+; CHECK-NEXT: [[B0:%.*]] = bitcast <16 x i1> [[C0]] to i16
+; CHECK-NEXT: [[B1:%.*]] = bitcast <16 x i1> [[C1]] to i16
+; CHECK-NEXT: [[Z0:%.*]] = zext i16 [[B0]] to i64
+; CHECK-NEXT: [[Z1:%.*]] = zext i16 [[B1]] to i64
+; CHECK-NEXT: [[S0:%.*]] = shl nuw nsw i64 [[Z0]], 16
+; CHECK-NEXT: [[OR:%.*]] = or disjoint i64 [[S0]], [[Z1]]
+; CHECK-NEXT: ret i64 [[OR]]
+;
+ %c0 = icmp slt <16 x i8> %v0, zeroinitializer
+ %c1 = icmp slt <16 x i8> %v1, zeroinitializer
+ %b0 = bitcast <16 x i1> %c0 to i16
+ %b1 = bitcast <16 x i1> %c1 to i16
+ %z0 = zext i16 %b0 to i64
+ %z1 = zext i16 %b1 to i64
+ %s0 = shl nuw i64 %z0, 16
+ %or = or disjoint i64 %s0, %z1
+ ret i64 %or
+}
+
+define i64 @movmsk_i64_v8i32_v4i32(<4 x i32> %v0, <4 x i32> %v1) {
+; CHECK-LABEL: @movmsk_i64_v8i32_v4i32(
+; CHECK-NEXT: [[C0:%.*]] = icmp slt <4 x i32> [[V0:%.*]], zeroinitializer
+; CHECK-NEXT: [[C1:%.*]] = icmp slt <4 x i32> [[V1:%.*]], zeroinitializer
+; CHECK-NEXT: [[B0:%.*]] = bitcast <4 x i1> [[C0]] to i4
+; CHECK-NEXT: [[B1:%.*]] = bitcast <4 x i1> [[C1]] to i4
+; CHECK-NEXT: [[Z0:%.*]] = zext i4 [[B0]] to i64
+; CHECK-NEXT: [[Z1:%.*]] = zext i4 [[B1]] to i64
+; CHECK-NEXT: [[S0:%.*]] = shl nuw nsw i64 [[Z0]], 4
+; CHECK-NEXT: [[OR:%.*]] = or disjoint i64 [[S0]], [[Z1]]
+; CHECK-NEXT: ret i64 [[OR]]
+;
+ %c0 = icmp slt <4 x i32> %v0, zeroinitializer
+ %c1 = icmp slt <4 x i32> %v1, zeroinitializer
+ %b0 = bitcast <4 x i1> %c0 to i4
+ %b1 = bitcast <4 x i1> %c1 to i4
+ %z0 = zext i4 %b0 to i64
+ %z1 = zext i4 %b1 to i64
+ %s0 = shl nuw i64 %z0, 4
+ %or = or disjoint i64 %s0, %z1
+ ret i64 %or
+}
+
+define i64 @movmsk_i64_v64i8_v16i8(<16 x i8> %v0, <16 x i8> %v1, <16 x i8> %v2, <16 x i8> %v3) {
+; CHECK-LABEL: @movmsk_i64_v64i8_v16i8(
+; CHECK-NEXT: [[C0:%.*]] = icmp slt <16 x i8> [[V0:%.*]], zeroinitializer
+; CHECK-NEXT: [[C1:%.*]] = icmp slt <16 x i8> [[V1:%.*]], zeroinitializer
+; CHECK-NEXT: [[C2:%.*]] = icmp slt <16 x i8> [[V2:%.*]], zeroinitializer
+; CHECK-NEXT: [[C3:%.*]] = icmp slt <16 x i8> [[V3:%.*]], zeroinitializer
+; CHECK-NEXT: [[B0:%.*]] = bitcast <16 x i1> [[C0]] to i16
+; CHECK-NEXT: [[B1:%.*]] = bitcast <16 x i1> [[C1]] to i16
+; CHECK-NEXT: [[B2:%.*]] = bitcast <16 x i1> [[C2]] to i16
+; CHECK-NEXT: [[B3:%.*]] = bitcast <16 x i1> [[C3]] to i16
+; CHECK-NEXT: [[Z0:%.*]] = zext i16 [[B0]] to i64
+; CHECK-NEXT: [[Z1:%.*]] = zext i16 [[B1]] to i64
+; CHECK-NEXT: [[Z2:%.*]] = zext i16 [[B2]] to i64
+; CHECK-NEXT: [[Z3:%.*]] = zext i16 [[B3]] to i64
+; CHECK-NEXT: [[S0:%.*]] = shl nuw i64 [[Z0]], 48
+; CHECK-NEXT: [[S1:%.*]] = shl nuw nsw i64 [[Z1]], 32
+; CHECK-NEXT: [[S2:%.*]] = shl nuw nsw i64 [[Z2]], 16
+; CHECK-NEXT: [[OR0:%.*]] = or disjoint i64 [[S1]], [[S0]]
+; CHECK-NEXT: [[OR1:%.*]] = or disjoint i64 [[S2]], [[Z3]]
+; CHECK-NEXT: [[OR:%.*]] = or disjoint i64 [[OR1]], [[OR0]]
+; CHECK-NEXT: ret i64 [[OR]]
+;
+ %c0 = icmp slt <16 x i8> %v0, zeroinitializer
+ %c1 = icmp slt <16 x i8> %v1, zeroinitializer
+ %c2 = icmp slt <16 x i8> %v2, zeroinitializer
+ %c3 = icmp slt <16 x i8> %v3, zeroinitializer
+ %b0 = bitcast <16 x i1> %c0 to i16
+ %b1 = bitcast <16 x i1> %c1 to i16
+ %b2 = bitcast <16 x i1> %c2 to i16
+ %b3 = bitcast <16 x i1> %c3 to i16
+ %z0 = zext i16 %b0 to i64
+ %z1 = zext i16 %b1 to i64
+ %z2 = zext i16 %b2 to i64
+ %z3 = zext i16 %b3 to i64
+ %s0 = shl nuw i64 %z0, 48
+ %s1 = shl nuw i64 %z1, 32
+ %s2 = shl nuw i64 %z2, 16
+ %or0 = or disjoint i64 %s0, %s1
+ %or1 = or disjoint i64 %s2, %z3
+ %or = or disjoint i64 %or0, %or1
+ ret i64 %or
+}
+
+define i64 @movmsk_i64_v32i32_v4i32(<4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) {
+; CHECK-LABEL: @movmsk_i64_v32i32_v4i32(
+; CHECK-NEXT: [[C0:%.*]] = icmp slt <4 x i32> [[V0:%.*]], zeroinitializer
+; CHECK-NEXT: [[C1:%.*]] = icmp slt <4 x i32> [[V1:%.*]], zeroinitializer
+; CHECK-NEXT: [[C2:%.*]] = icmp slt <4 x i32> [[V2:%.*]], zeroinitializer
+; CHECK-NEXT: [[C3:%.*]] = icmp slt <4 x i32> [[V3:%.*]], zeroinitializer
+; CHECK-NEXT: [[B0:%.*]] = bitcast <4 x i1> [[C0]] to i4
+; CHECK-NEXT: [[B1:%.*]] = bitcast <4 x i1> [[C1]] to i4
+; CHECK-NEXT: [[B2:%.*]] = bitcast <4 x i1> [[C2]] to i4
+; CHECK-NEXT: [[B3:%.*]] = bitcast <4 x i1> [[C3]] to i4
+; CHECK-NEXT: [[Z0:%.*]] = zext i4 [[B0]] to i64
+; CHECK-NEXT: [[Z1:%.*]] = zext i4 [[B1]] to i64
+; CHECK-NEXT: [[Z2:%.*]] = zext i4 [[B2]] to i64
+; CHECK-NEXT: [[Z3:%.*]] = zext i4 [[B3]] to i64
+; CHECK-NEXT: [[S0:%.*]] = shl nuw nsw i64 [[Z0]], 12
+; CHECK-NEXT: [[S1:%.*]] = shl nuw nsw i64 [[Z1]], 8
+; CHECK-NEXT: [[S2:%.*]] = shl nuw nsw i64 [[Z2]], 4
+; CHECK-NEXT: [[OR0:%.*]] = or disjoint i64 [[S1]], [[S0]]
+; CHECK-NEXT: [[OR1:%.*]] = or disjoint i64 [[S2]], [[Z3]]
+; CHECK-NEXT: [[OR:%.*]] = or disjoint i64 [[OR1]], [[OR0]]
+; CHECK-NEXT: ret i64 [[OR]]
+;
+ %c0 = icmp slt <4 x i32> %v0, zeroinitializer
+ %c1 = icmp slt <4 x i32> %v1, zeroinitializer
+ %c2 = icmp slt <4 x i32> %v2, zeroinitializer
+ %c3 = icmp slt <4 x i32> %v3, zeroinitializer
+ %b0 = bitcast <4 x i1> %c0 to i4
+ %b1 = bitcast <4 x i1> %c1 to i4
+ %b2 = bitcast <4 x i1> %c2 to i4
+ %b3 = bitcast <4 x i1> %c3 to i4
+ %z0 = zext i4 %b0 to i64
+ %z1 = zext i4 %b1 to i64
+ %z2 = zext i4 %b2 to i64
+ %z3 = zext i4 %b3 to i64
+ %s0 = shl nuw i64 %z0, 12
+ %s1 = shl nuw i64 %z1, 8
+ %s2 = shl nuw i64 %z2, 4
+ %or0 = or disjoint i64 %s0, %s1
+ %or1 = or disjoint i64 %s2, %z3
+ %or = or disjoint i64 %or0, %or1
+ ret i64 %or
+}
+
+define i64 @movmsk_i64_v64i8_v32i8(<32 x i8> %v0, <32 x i8> %v1) {
+; CHECK-LABEL: @movmsk_i64_v64i8_v32i8(
+; CHECK-NEXT: [[C0:%.*]] = icmp slt <32 x i8> [[V0:%.*]], zeroinitializer
+; CHECK-NEXT: [[C1:%.*]] = icmp slt <32 x i8> [[V1:%.*]], zeroinitializer
+; CHECK-NEXT: [[B0:%.*]] = bitcast <32 x i1> [[C0]] to i32
+; CHECK-NEXT: [[B1:%.*]] = bitcast <32 x i1> [[C1]] to i32
+; CHECK-NEXT: [[Z0:%.*]] = zext i32 [[B0]] to i64
+; CHECK-NEXT: [[Z1:%.*]] = zext i32 [[B1]] to i64
+; CHECK-NEXT: [[S0:%.*]] = shl nuw i64 [[Z0]], 32
+; CHECK-NEXT: [[OR:%.*]] = or disjoint i64 [[S0]], [[Z1]]
+; CHECK-NEXT: ret i64 [[OR]]
+;
+ %c0 = icmp slt <32 x i8> %v0, zeroinitializer
+ %c1 = icmp slt <32 x i8> %v1, zeroinitializer
+ %b0 = bitcast <32 x i1> %c0 to i32
+ %b1 = bitcast <32 x i1> %c1 to i32
+ %z0 = zext i32 %b0 to i64
+ %z1 = zext i32 %b1 to i64
+ %s0 = shl nuw i64 %z0, 32
+ %or = or disjoint i64 %s0, %z1
+ ret i64 %or
+}
+
+define i32 @movmsk_i32_v16i32_v8i32(<8 x i32> %v0, <8 x i32> %v1) {
+; CHECK-LABEL: @movmsk_i32_v16i32_v8i32(
+; CHECK-NEXT: [[C0:%.*]] = icmp slt <8 x i32> [[V0:%.*]], zeroinitializer
+; CHECK-NEXT: [[C1:%.*]] = icmp slt <8 x i32> [[V1:%.*]], zeroinitializer
+; CHECK-NEXT: [[B0:%.*]] = bitcast <8 x i1> [[C0]] to i8
+; CHECK-NEXT: [[B1:%.*]] = bitcast <8 x i1> [[C1]] to i8
+; CHECK-NEXT: [[Z0:%.*]] = zext i8 [[B0]] to i32
+; CHECK-NEXT: [[Z1:%.*]] = zext i8 [[B1]] to i32
+; CHECK-NEXT: [[S0:%.*]] = shl nuw nsw i32 [[Z0]], 8
+; CHECK-NEXT: [[OR:%.*]] = or disjoint i32 [[S0]], [[Z1]]
+; CHECK-NEXT: ret i32 [[OR]]
+;
+ %c0 = icmp slt <8 x i32> %v0, zeroinitializer
+ %c1 = icmp slt <8 x i32> %v1, zeroinitializer
+ %b0 = bitcast <8 x i1> %c0 to i8
+ %b1 = bitcast <8 x i1> %c1 to i8
+ %z0 = zext i8 %b0 to i32
+ %z1 = zext i8 %b1 to i32
+ %s0 = shl nuw i32 %z0, 8
+ %or = or disjoint i32 %s0, %z1
+ ret i32 %or
+}
+
+define i64 @PR111431(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> %a2) {
+; CHECK-LABEL: @PR111431(
+; CHECK-NEXT: [[C01:%.*]] = icmp eq <32 x i8> [[A0:%.*]], [[A1:%.*]]
+; CHECK-NEXT: [[C02:%.*]] = icmp eq <32 x i8> [[A0]], [[A2:%.*]]
+; CHECK-NEXT: [[B01:%.*]] = bitcast <32 x i1> [[C01]] to i32
+; CHECK-NEXT: [[B02:%.*]] = bitcast <32 x i1> [[C02]] to i32
+; CHECK-NEXT: [[Z01:%.*]] = zext i32 [[B01]] to i64
+; CHECK-NEXT: [[Z02:%.*]] = zext i32 [[B02]] to i64
+; CHECK-NEXT: [[SHL:%.*]] = shl nuw i64 [[Z01]], 32
+; CHECK-NEXT: [[OR:%.*]] = or disjoint i64 [[SHL]], [[Z02]]
+; CHECK-NEXT: ret i64 [[OR]]
+;
+ %c01 = icmp eq <32 x i8> %a0, %a1
+ %c02 = icmp eq <32 x i8> %a0, %a2
+ %b01 = bitcast <32 x i1> %c01 to i32
+ %b02 = bitcast <32 x i1> %c02 to i32
+ %z01 = zext i32 %b01 to i64
+ %z02 = zext i32 %b02 to i64
+ %shl = shl nuw i64 %z01, 32
+ %or = or disjoint i64 %shl, %z02
+ ret i64 %or
+}
More information about the llvm-commits
mailing list