[llvm] c5a21c1 - [PhaseOrdering][X86] Add test coverage based off #111431

Tue Dec 10 09:31:20 PST 2024

Author: Simon Pilgrim
Date: 2024-12-10T17:31:08Z
New Revision: c5a21c115856298fcc04f343f573965da7b14af4

URL: https://github.com/llvm/llvm-project/commit/c5a21c115856298fcc04f343f573965da7b14af4
DIFF: https://github.com/llvm/llvm-project/commit/c5a21c115856298fcc04f343f573965da7b14af4.diff

LOG: [PhaseOrdering][X86] Add test coverage based off #111431

Add tests for the concatenation of boolean vectors bitcast to integers - similar to the MOVMSK pattern.

Added: 
    llvm/test/Transforms/PhaseOrdering/X86/concat-boolmasks.ll

Modified: 
    

Removed: 
    


################################################################################
diff  --git a/llvm/test/Transforms/PhaseOrdering/X86/concat-boolmasks.ll b/llvm/test/Transforms/PhaseOrdering/X86/concat-boolmasks.ll
new file mode 100644
index 00000000000000..07bfbffa9518fa

--- /dev/null
+++ b/llvm/test/Transforms/PhaseOrdering/X86/concat-boolmasks.ll
@@ -0,0 +1,252 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -O3 -S -mtriple=x86_64-- -mcpu=x86-64    | FileCheck %s
+; RUN: opt < %s -O3 -S -mtriple=x86_64-- -mcpu=x86-64-v2 | FileCheck %s
+; RUN: opt < %s -O3 -S -mtriple=x86_64-- -mcpu=x86-64-v3 | FileCheck %s
+; RUN: opt < %s -O3 -S -mtriple=x86_64-- -mcpu=x86-64-v4 | FileCheck %s
+
+define i32 @movmsk_i32_v32i8_v16i8(<16 x i8> %v0, <16 x i8> %v1) {
+; CHECK-LABEL: @movmsk_i32_v32i8_v16i8(
+; CHECK-NEXT:    [[C0:%.*]] = icmp slt <16 x i8> [[V0:%.*]], zeroinitializer
+; CHECK-NEXT:    [[C1:%.*]] = icmp slt <16 x i8> [[V1:%.*]], zeroinitializer
+; CHECK-NEXT:    [[B0:%.*]] = bitcast <16 x i1> [[C0]] to i16
+; CHECK-NEXT:    [[B1:%.*]] = bitcast <16 x i1> [[C1]] to i16
+; CHECK-NEXT:    [[Z0:%.*]] = zext i16 [[B0]] to i32
+; CHECK-NEXT:    [[Z1:%.*]] = zext i16 [[B1]] to i32
+; CHECK-NEXT:    [[S0:%.*]] = shl nuw i32 [[Z0]], 16
+; CHECK-NEXT:    [[OR:%.*]] = or disjoint i32 [[S0]], [[Z1]]
+; CHECK-NEXT:    ret i32 [[OR]]
+;
+  %c0 = icmp slt <16 x i8> %v0, zeroinitializer
+  %c1 = icmp slt <16 x i8> %v1, zeroinitializer
+  %b0 = bitcast <16 x i1> %c0 to i16
+  %b1 = bitcast <16 x i1> %c1 to i16
+  %z0 = zext i16 %b0 to i32
+  %z1 = zext i16 %b1 to i32
+  %s0 = shl nuw i32 %z0, 16
+  %or = or disjoint i32 %s0, %z1
+  ret i32 %or
+}
+
+define i32 @movmsk_i32_v8i32_v4i32(<4 x i32> %v0, <4 x i32> %v1) {
+; CHECK-LABEL: @movmsk_i32_v8i32_v4i32(
+; CHECK-NEXT:    [[C0:%.*]] = icmp slt <4 x i32> [[V0:%.*]], zeroinitializer
+; CHECK-NEXT:    [[C1:%.*]] = icmp slt <4 x i32> [[V1:%.*]], zeroinitializer
+; CHECK-NEXT:    [[B0:%.*]] = bitcast <4 x i1> [[C0]] to i4
+; CHECK-NEXT:    [[B1:%.*]] = bitcast <4 x i1> [[C1]] to i4
+; CHECK-NEXT:    [[Z0:%.*]] = zext i4 [[B0]] to i32
+; CHECK-NEXT:    [[Z1:%.*]] = zext i4 [[B1]] to i32
+; CHECK-NEXT:    [[S0:%.*]] = shl nuw nsw i32 [[Z0]], 4
+; CHECK-NEXT:    [[OR:%.*]] = or disjoint i32 [[S0]], [[Z1]]
+; CHECK-NEXT:    ret i32 [[OR]]
+;
+  %c0 = icmp slt <4 x i32> %v0, zeroinitializer
+  %c1 = icmp slt <4 x i32> %v1, zeroinitializer
+  %b0 = bitcast <4 x i1> %c0 to i4
+  %b1 = bitcast <4 x i1> %c1 to i4
+  %z0 = zext i4 %b0 to i32
+  %z1 = zext i4 %b1 to i32
+  %s0 = shl nuw i32 %z0, 4
+  %or = or disjoint i32 %s0, %z1
+  ret i32 %or
+}
+
+define i64 @movmsk_i64_v32i8_v16i8(<16 x i8> %v0, <16 x i8> %v1) {
+; CHECK-LABEL: @movmsk_i64_v32i8_v16i8(
+; CHECK-NEXT:    [[C0:%.*]] = icmp slt <16 x i8> [[V0:%.*]], zeroinitializer
+; CHECK-NEXT:    [[C1:%.*]] = icmp slt <16 x i8> [[V1:%.*]], zeroinitializer
+; CHECK-NEXT:    [[B0:%.*]] = bitcast <16 x i1> [[C0]] to i16
+; CHECK-NEXT:    [[B1:%.*]] = bitcast <16 x i1> [[C1]] to i16
+; CHECK-NEXT:    [[Z0:%.*]] = zext i16 [[B0]] to i64
+; CHECK-NEXT:    [[Z1:%.*]] = zext i16 [[B1]] to i64
+; CHECK-NEXT:    [[S0:%.*]] = shl nuw nsw i64 [[Z0]], 16
+; CHECK-NEXT:    [[OR:%.*]] = or disjoint i64 [[S0]], [[Z1]]
+; CHECK-NEXT:    ret i64 [[OR]]
+;
+  %c0 = icmp slt <16 x i8> %v0, zeroinitializer
+  %c1 = icmp slt <16 x i8> %v1, zeroinitializer
+  %b0 = bitcast <16 x i1> %c0 to i16
+  %b1 = bitcast <16 x i1> %c1 to i16
+  %z0 = zext i16 %b0 to i64
+  %z1 = zext i16 %b1 to i64
+  %s0 = shl nuw i64 %z0, 16
+  %or = or disjoint i64 %s0, %z1
+  ret i64 %or
+}
+
+define i64 @movmsk_i64_v8i32_v4i32(<4 x i32> %v0, <4 x i32> %v1) {
+; CHECK-LABEL: @movmsk_i64_v8i32_v4i32(
+; CHECK-NEXT:    [[C0:%.*]] = icmp slt <4 x i32> [[V0:%.*]], zeroinitializer
+; CHECK-NEXT:    [[C1:%.*]] = icmp slt <4 x i32> [[V1:%.*]], zeroinitializer
+; CHECK-NEXT:    [[B0:%.*]] = bitcast <4 x i1> [[C0]] to i4
+; CHECK-NEXT:    [[B1:%.*]] = bitcast <4 x i1> [[C1]] to i4
+; CHECK-NEXT:    [[Z0:%.*]] = zext i4 [[B0]] to i64
+; CHECK-NEXT:    [[Z1:%.*]] = zext i4 [[B1]] to i64
+; CHECK-NEXT:    [[S0:%.*]] = shl nuw nsw i64 [[Z0]], 4
+; CHECK-NEXT:    [[OR:%.*]] = or disjoint i64 [[S0]], [[Z1]]
+; CHECK-NEXT:    ret i64 [[OR]]
+;
+  %c0 = icmp slt <4 x i32> %v0, zeroinitializer
+  %c1 = icmp slt <4 x i32> %v1, zeroinitializer
+  %b0 = bitcast <4 x i1> %c0 to i4
+  %b1 = bitcast <4 x i1> %c1 to i4
+  %z0 = zext i4 %b0 to i64
+  %z1 = zext i4 %b1 to i64
+  %s0 = shl nuw i64 %z0, 4
+  %or = or disjoint i64 %s0, %z1
+  ret i64 %or
+}
+
+define i64 @movmsk_i64_v64i8_v16i8(<16 x i8> %v0, <16 x i8> %v1, <16 x i8> %v2, <16 x i8> %v3) {
+; CHECK-LABEL: @movmsk_i64_v64i8_v16i8(
+; CHECK-NEXT:    [[C0:%.*]] = icmp slt <16 x i8> [[V0:%.*]], zeroinitializer
+; CHECK-NEXT:    [[C1:%.*]] = icmp slt <16 x i8> [[V1:%.*]], zeroinitializer
+; CHECK-NEXT:    [[C2:%.*]] = icmp slt <16 x i8> [[V2:%.*]], zeroinitializer
+; CHECK-NEXT:    [[C3:%.*]] = icmp slt <16 x i8> [[V3:%.*]], zeroinitializer
+; CHECK-NEXT:    [[B0:%.*]] = bitcast <16 x i1> [[C0]] to i16
+; CHECK-NEXT:    [[B1:%.*]] = bitcast <16 x i1> [[C1]] to i16
+; CHECK-NEXT:    [[B2:%.*]] = bitcast <16 x i1> [[C2]] to i16
+; CHECK-NEXT:    [[B3:%.*]] = bitcast <16 x i1> [[C3]] to i16
+; CHECK-NEXT:    [[Z0:%.*]] = zext i16 [[B0]] to i64
+; CHECK-NEXT:    [[Z1:%.*]] = zext i16 [[B1]] to i64
+; CHECK-NEXT:    [[Z2:%.*]] = zext i16 [[B2]] to i64
+; CHECK-NEXT:    [[Z3:%.*]] = zext i16 [[B3]] to i64
+; CHECK-NEXT:    [[S0:%.*]] = shl nuw i64 [[Z0]], 48
+; CHECK-NEXT:    [[S1:%.*]] = shl nuw nsw i64 [[Z1]], 32
+; CHECK-NEXT:    [[S2:%.*]] = shl nuw nsw i64 [[Z2]], 16
+; CHECK-NEXT:    [[OR0:%.*]] = or disjoint i64 [[S1]], [[S0]]
+; CHECK-NEXT:    [[OR1:%.*]] = or disjoint i64 [[S2]], [[Z3]]
+; CHECK-NEXT:    [[OR:%.*]] = or disjoint i64 [[OR1]], [[OR0]]
+; CHECK-NEXT:    ret i64 [[OR]]
+;
+  %c0 = icmp slt <16 x i8> %v0, zeroinitializer
+  %c1 = icmp slt <16 x i8> %v1, zeroinitializer
+  %c2 = icmp slt <16 x i8> %v2, zeroinitializer
+  %c3 = icmp slt <16 x i8> %v3, zeroinitializer
+  %b0 = bitcast <16 x i1> %c0 to i16
+  %b1 = bitcast <16 x i1> %c1 to i16
+  %b2 = bitcast <16 x i1> %c2 to i16
+  %b3 = bitcast <16 x i1> %c3 to i16
+  %z0 = zext i16 %b0 to i64
+  %z1 = zext i16 %b1 to i64
+  %z2 = zext i16 %b2 to i64
+  %z3 = zext i16 %b3 to i64
+  %s0 = shl nuw i64 %z0, 48
+  %s1 = shl nuw i64 %z1, 32
+  %s2 = shl nuw i64 %z2, 16
+  %or0 = or disjoint i64 %s0, %s1
+  %or1 = or disjoint i64 %s2, %z3
+  %or = or disjoint i64 %or0, %or1
+  ret i64 %or
+}
+
+define i64 @movmsk_i64_v32i32_v4i32(<4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) {
+; CHECK-LABEL: @movmsk_i64_v32i32_v4i32(
+; CHECK-NEXT:    [[C0:%.*]] = icmp slt <4 x i32> [[V0:%.*]], zeroinitializer
+; CHECK-NEXT:    [[C1:%.*]] = icmp slt <4 x i32> [[V1:%.*]], zeroinitializer
+; CHECK-NEXT:    [[C2:%.*]] = icmp slt <4 x i32> [[V2:%.*]], zeroinitializer
+; CHECK-NEXT:    [[C3:%.*]] = icmp slt <4 x i32> [[V3:%.*]], zeroinitializer
+; CHECK-NEXT:    [[B0:%.*]] = bitcast <4 x i1> [[C0]] to i4
+; CHECK-NEXT:    [[B1:%.*]] = bitcast <4 x i1> [[C1]] to i4
+; CHECK-NEXT:    [[B2:%.*]] = bitcast <4 x i1> [[C2]] to i4
+; CHECK-NEXT:    [[B3:%.*]] = bitcast <4 x i1> [[C3]] to i4
+; CHECK-NEXT:    [[Z0:%.*]] = zext i4 [[B0]] to i64
+; CHECK-NEXT:    [[Z1:%.*]] = zext i4 [[B1]] to i64
+; CHECK-NEXT:    [[Z2:%.*]] = zext i4 [[B2]] to i64
+; CHECK-NEXT:    [[Z3:%.*]] = zext i4 [[B3]] to i64
+; CHECK-NEXT:    [[S0:%.*]] = shl nuw nsw i64 [[Z0]], 12
+; CHECK-NEXT:    [[S1:%.*]] = shl nuw nsw i64 [[Z1]], 8
+; CHECK-NEXT:    [[S2:%.*]] = shl nuw nsw i64 [[Z2]], 4
+; CHECK-NEXT:    [[OR0:%.*]] = or disjoint i64 [[S1]], [[S0]]
+; CHECK-NEXT:    [[OR1:%.*]] = or disjoint i64 [[S2]], [[Z3]]
+; CHECK-NEXT:    [[OR:%.*]] = or disjoint i64 [[OR1]], [[OR0]]
+; CHECK-NEXT:    ret i64 [[OR]]
+;
+  %c0 = icmp slt <4 x i32> %v0, zeroinitializer
+  %c1 = icmp slt <4 x i32> %v1, zeroinitializer
+  %c2 = icmp slt <4 x i32> %v2, zeroinitializer
+  %c3 = icmp slt <4 x i32> %v3, zeroinitializer
+  %b0 = bitcast <4 x i1> %c0 to i4
+  %b1 = bitcast <4 x i1> %c1 to i4
+  %b2 = bitcast <4 x i1> %c2 to i4
+  %b3 = bitcast <4 x i1> %c3 to i4
+  %z0 = zext i4 %b0 to i64
+  %z1 = zext i4 %b1 to i64
+  %z2 = zext i4 %b2 to i64
+  %z3 = zext i4 %b3 to i64
+  %s0 = shl nuw i64 %z0, 12
+  %s1 = shl nuw i64 %z1, 8
+  %s2 = shl nuw i64 %z2, 4
+  %or0 = or disjoint i64 %s0, %s1
+  %or1 = or disjoint i64 %s2, %z3
+  %or = or disjoint i64 %or0, %or1
+  ret i64 %or
+}
+
+define i64 @movmsk_i64_v64i8_v32i8(<32 x i8> %v0, <32 x i8> %v1) {
+; CHECK-LABEL: @movmsk_i64_v64i8_v32i8(
+; CHECK-NEXT:    [[C0:%.*]] = icmp slt <32 x i8> [[V0:%.*]], zeroinitializer
+; CHECK-NEXT:    [[C1:%.*]] = icmp slt <32 x i8> [[V1:%.*]], zeroinitializer
+; CHECK-NEXT:    [[B0:%.*]] = bitcast <32 x i1> [[C0]] to i32
+; CHECK-NEXT:    [[B1:%.*]] = bitcast <32 x i1> [[C1]] to i32
+; CHECK-NEXT:    [[Z0:%.*]] = zext i32 [[B0]] to i64
+; CHECK-NEXT:    [[Z1:%.*]] = zext i32 [[B1]] to i64
+; CHECK-NEXT:    [[S0:%.*]] = shl nuw i64 [[Z0]], 32
+; CHECK-NEXT:    [[OR:%.*]] = or disjoint i64 [[S0]], [[Z1]]
+; CHECK-NEXT:    ret i64 [[OR]]
+;
+  %c0 = icmp slt <32 x i8> %v0, zeroinitializer
+  %c1 = icmp slt <32 x i8> %v1, zeroinitializer
+  %b0 = bitcast <32 x i1> %c0 to i32
+  %b1 = bitcast <32 x i1> %c1 to i32
+  %z0 = zext i32 %b0 to i64
+  %z1 = zext i32 %b1 to i64
+  %s0 = shl nuw i64 %z0, 32
+  %or = or disjoint i64 %s0, %z1
+  ret i64 %or
+}
+
+define i32 @movmsk_i32_v16i32_v8i32(<8 x i32> %v0, <8 x i32> %v1) {
+; CHECK-LABEL: @movmsk_i32_v16i32_v8i32(
+; CHECK-NEXT:    [[C0:%.*]] = icmp slt <8 x i32> [[V0:%.*]], zeroinitializer
+; CHECK-NEXT:    [[C1:%.*]] = icmp slt <8 x i32> [[V1:%.*]], zeroinitializer
+; CHECK-NEXT:    [[B0:%.*]] = bitcast <8 x i1> [[C0]] to i8
+; CHECK-NEXT:    [[B1:%.*]] = bitcast <8 x i1> [[C1]] to i8
+; CHECK-NEXT:    [[Z0:%.*]] = zext i8 [[B0]] to i32
+; CHECK-NEXT:    [[Z1:%.*]] = zext i8 [[B1]] to i32
+; CHECK-NEXT:    [[S0:%.*]] = shl nuw nsw i32 [[Z0]], 8
+; CHECK-NEXT:    [[OR:%.*]] = or disjoint i32 [[S0]], [[Z1]]
+; CHECK-NEXT:    ret i32 [[OR]]
+;
+  %c0 = icmp slt <8 x i32> %v0, zeroinitializer
+  %c1 = icmp slt <8 x i32> %v1, zeroinitializer
+  %b0 = bitcast <8 x i1> %c0 to i8
+  %b1 = bitcast <8 x i1> %c1 to i8
+  %z0 = zext i8 %b0 to i32
+  %z1 = zext i8 %b1 to i32
+  %s0 = shl nuw i32 %z0, 8
+  %or = or disjoint i32 %s0, %z1
+  ret i32 %or
+}
+
+define i64 @PR111431(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> %a2) {
+; CHECK-LABEL: @PR111431(
+; CHECK-NEXT:    [[C01:%.*]] = icmp eq <32 x i8> [[A0:%.*]], [[A1:%.*]]
+; CHECK-NEXT:    [[C02:%.*]] = icmp eq <32 x i8> [[A0]], [[A2:%.*]]
+; CHECK-NEXT:    [[B01:%.*]] = bitcast <32 x i1> [[C01]] to i32
+; CHECK-NEXT:    [[B02:%.*]] = bitcast <32 x i1> [[C02]] to i32
+; CHECK-NEXT:    [[Z01:%.*]] = zext i32 [[B01]] to i64
+; CHECK-NEXT:    [[Z02:%.*]] = zext i32 [[B02]] to i64
+; CHECK-NEXT:    [[SHL:%.*]] = shl nuw i64 [[Z01]], 32
+; CHECK-NEXT:    [[OR:%.*]] = or disjoint i64 [[SHL]], [[Z02]]
+; CHECK-NEXT:    ret i64 [[OR]]
+;
+  %c01 = icmp eq <32 x i8> %a0, %a1
+  %c02 = icmp eq <32 x i8> %a0, %a2
+  %b01 = bitcast <32 x i1> %c01 to i32
+  %b02 = bitcast <32 x i1> %c02 to i32
+  %z01 = zext i32 %b01 to i64
+  %z02 = zext i32 %b02 to i64
+  %shl = shl nuw i64 %z01, 32
+  %or = or disjoint i64 %shl, %z02
+  ret i64 %or
+}