[llvm] 3f2cc7c - GlobalISel: Fix lowerSelect handling of boolean high bits

Tue Apr 12 13:54:15 PDT 2022

Author: Matt Arsenault
Date: 2022-04-12T16:54:03-04:00
New Revision: 3f2cc7cc2b17ea6b7c09470ffc75b4691ce3ea95

URL: https://github.com/llvm/llvm-project/commit/3f2cc7cc2b17ea6b7c09470ffc75b4691ce3ea95
DIFF: https://github.com/llvm/llvm-project/commit/3f2cc7cc2b17ea6b7c09470ffc75b4691ce3ea95.diff

LOG: GlobalISel: Fix lowerSelect handling of boolean high bits

This was making several invalid assumptions about the incoming
select. First, it was assuming the incoming condition was either s1 or
already sign extended, not accounting for different boolean high bits
behavior between scalar and vector conditions. We only had a vector
boolean due to the intermediate step vector select, which is now
avoided.

Second, it was assuming it can use the result vector type as a boolean
mask. These types don't have anything to do with other, and only makes
sense in the context of the expansion to bit operations. Since these
logically are part of the same lowering, do the complete expansion in
a single step.

The added select_v4s1_s1 test does fail to legalize, since it seems
AArch64's vector legalization support is pretty incomplete.

Added: 
    

Modified: 
    llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
    llvm/test/CodeGen/AArch64/GlobalISel/legalize-select.mir

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 370a13e599e96..0c4de3ae63a74 100644

--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -7257,25 +7257,32 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerSelect(MachineInstr &MI) {
   Register Op2Reg = MI.getOperand(3).getReg();
   LLT DstTy = MRI.getType(DstReg);
   LLT MaskTy = MRI.getType(MaskReg);
-  LLT Op1Ty = MRI.getType(Op1Reg);
   if (!DstTy.isVector())
     return UnableToLegalize;
 
-  // Vector selects can have a scalar predicate. If so, splat into a vector and
-  // finish for later legalization attempts to try again.
   if (MaskTy.isScalar()) {
+    // Turn the scalar condition into a vector condition mask.
+
     Register MaskElt = MaskReg;
-    if (MaskTy.getSizeInBits() < DstTy.getScalarSizeInBits())
-      MaskElt = MIRBuilder.buildSExt(DstTy.getElementType(), MaskElt).getReg(0);
-    // Generate a vector splat idiom to be pattern matched later.
+
+    // The condition was potentially zero extended before, but we want a sign
+    // extended boolean.
+    if (MaskTy.getSizeInBits() <= DstTy.getScalarSizeInBits() &&
+        MaskTy != LLT::scalar(1)) {
+      MaskElt = MIRBuilder.buildSExtInReg(MaskTy, MaskElt, 1).getReg(0);
+    }
+
+    // Continue the sign extension (or truncate) to match the data type.
+    MaskElt = MIRBuilder.buildSExtOrTrunc(DstTy.getElementType(),
+                                          MaskElt).getReg(0);
+
+    // Generate a vector splat idiom.
     auto ShufSplat = MIRBuilder.buildShuffleSplat(DstTy, MaskElt);
-    Observer.changingInstr(MI);
-    MI.getOperand(1).setReg(ShufSplat.getReg(0));
-    Observer.changedInstr(MI);
-    return Legalized;
+    MaskReg = ShufSplat.getReg(0);
+    MaskTy = DstTy;
   }
 
-  if (MaskTy.getSizeInBits() != Op1Ty.getSizeInBits()) {
+  if (MaskTy.getSizeInBits() != DstTy.getSizeInBits()) {
     return UnableToLegalize;
   }
 

diff  --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-select.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-select.mir
index 5fbc1b256b62d..d329653950506 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-select.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-select.mir
@@ -185,3 +185,150 @@ body:             |
     %trunc:_(s64) = G_TRUNC %select
     $x0 = COPY %trunc
     RET_ReallyLR implicit $x0
+...
+
+# The select condition has already been zero extended to s32, and
+# needs a sext_inreg to get a vector boolean.
+---
+name:            scalar_mask_already_promoted_select_s32_v4s32
+alignment:       4
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $q0, $w0
+
+    ; CHECK-LABEL: name: scalar_mask_already_promoted_select_s32_v4s32
+    ; CHECK: liveins: $q0, $w0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q0
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4100
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00
+    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C1]](s32), [[C1]](s32), [[C1]](s32), [[C1]](s32)
+    ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[COPY]](s32), [[C]]
+    ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ICMP]], 1
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[SEXT_INREG]](s32)
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<4 x s32>) = G_INSERT_VECTOR_ELT [[DEF]], [[COPY2]](s32), [[C2]](s64)
+    ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<4 x s32>) = G_SHUFFLE_VECTOR [[IVEC]](<4 x s32>), [[DEF]], shufflemask(0, 0, 0, 0)
+    ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
+    ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C3]](s32), [[C3]](s32), [[C3]](s32), [[C3]](s32)
+    ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<4 x s32>) = G_XOR [[SHUF]], [[BUILD_VECTOR1]]
+    ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<4 x s32>) = G_AND [[COPY1]], [[SHUF]]
+    ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(<4 x s32>) = G_AND [[BUILD_VECTOR]], [[XOR]]
+    ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<4 x s32>) = G_OR [[AND]], [[AND1]]
+    ; CHECK-NEXT: $q0 = COPY [[OR]](<4 x s32>)
+    ; CHECK-NEXT: RET_ReallyLR implicit $q0
+    %0:_(s32) = COPY $w0
+    %1:_(<4 x s32>) = COPY $q0
+    %2:_(s32) = G_CONSTANT i32 4100
+    %6:_(s32) = G_FCONSTANT float 0.000000e+00
+    %5:_(<4 x s32>) = G_BUILD_VECTOR %6(s32), %6(s32), %6(s32), %6(s32)
+    %3:_(s32) = G_ICMP intpred(eq), %0(s32), %2
+    %4:_(<4 x s32>) = G_SELECT %3(s32), %1, %5
+    $q0 = COPY %4(<4 x s32>)
+    RET_ReallyLR implicit $q0
+
+...
+
+# The scalar select condition was zero extended to s32, to a 
diff erent
+# type from the vector width. It needs to be sign extended inreg, and
+# then sign extended to the full element width.
+
+---
+name:            scalar_mask_select_s32_v4s64
+alignment:       4
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $q0, $w0
+
+    ; CHECK-LABEL: name: scalar_mask_select_s32_v4s64
+    ; CHECK: liveins: $q0, $w0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $q0
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4100
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 0.000000e+00
+    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C1]](s64), [[C1]](s64)
+    ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[COPY]](s32), [[C]]
+    ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ICMP]], 1
+    ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[SEXT_INREG]](s32)
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<2 x s64>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<2 x s64>) = G_INSERT_VECTOR_ELT [[DEF]], [[SEXT]](s64), [[C2]](s64)
+    ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<2 x s64>) = G_SHUFFLE_VECTOR [[IVEC]](<2 x s64>), [[DEF]], shufflemask(0, 0)
+    ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1
+    ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C3]](s64), [[C3]](s64)
+    ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<2 x s64>) = G_XOR [[SHUF]], [[BUILD_VECTOR1]]
+    ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<2 x s64>) = G_AND [[COPY1]], [[SHUF]]
+    ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(<2 x s64>) = G_AND [[BUILD_VECTOR]], [[XOR]]
+    ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<2 x s64>) = G_OR [[AND]], [[AND1]]
+    ; CHECK-NEXT: $q0 = COPY [[OR]](<2 x s64>)
+    ; CHECK-NEXT: RET_ReallyLR implicit $q0
+    %0:_(s32) = COPY $w0
+    %1:_(<2 x s64>) = COPY $q0
+    %2:_(s32) = G_CONSTANT i32 4100
+    %6:_(s64) = G_FCONSTANT double 0.000000e+00
+    %5:_(<2 x s64>) = G_BUILD_VECTOR %6, %6
+    %3:_(s32) = G_ICMP intpred(eq), %0(s32), %2
+    %4:_(<2 x s64>) = G_SELECT %3(s32), %1, %5
+    $q0 = COPY %4
+    RET_ReallyLR implicit $q0
+
+...
+
+# Check degenerate case where the selected element size is the same as
+# the condition bitwidth.
+---
+name:            select_v4s1_s1
+alignment:       4
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $q0, $q1, $q2, $w0
+
+    ; CHECK-LABEL: name: select_v4s1_s1
+    ; CHECK: liveins: $q0, $q1, $q2, $w0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: %w0:_(s32) = COPY $w0
+    ; CHECK-NEXT: %q0:_(<4 x s32>) = COPY $q0
+    ; CHECK-NEXT: %q1:_(<4 x s32>) = COPY $q1
+    ; CHECK-NEXT: %q2:_(<4 x s32>) = COPY $q2
+    ; CHECK-NEXT: %vec_cond0:_(<4 x s1>) = G_ICMP intpred(eq), %q0(<4 x s32>), %q1
+    ; CHECK-NEXT: %vec_cond1:_(<4 x s1>) = G_ICMP intpred(eq), %q0(<4 x s32>), %q2
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4100
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00
+    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C1]](s32), [[C1]](s32), [[C1]](s32), [[C1]](s32)
+    ; CHECK-NEXT: %cmp:_(s1) = G_ICMP intpred(eq), %w0(s32), [[C]]
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s1) = COPY %cmp(s1)
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s1>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<4 x s1>) = G_INSERT_VECTOR_ELT [[DEF]], [[COPY]](s1), [[C2]](s64)
+    ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<4 x s1>) = G_SHUFFLE_VECTOR [[IVEC]](<4 x s1>), [[DEF]], shufflemask(0, 0, 0, 0)
+    ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
+    ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s1>) = G_BUILD_VECTOR [[C3]](s1), [[C3]](s1), [[C3]](s1), [[C3]](s1)
+    ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<4 x s1>) = G_XOR [[SHUF]], [[BUILD_VECTOR1]]
+    ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<4 x s1>) = G_AND %vec_cond0, [[SHUF]]
+    ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(<4 x s1>) = G_AND %vec_cond1, [[XOR]]
+    ; CHECK-NEXT: %select:_(<4 x s1>) = G_OR [[AND]], [[AND1]]
+    ; CHECK-NEXT: %zext_select:_(<4 x s32>) = G_ZEXT %select(<4 x s1>)
+    ; CHECK-NEXT: $q0 = COPY %zext_select(<4 x s32>)
+    ; CHECK-NEXT: RET_ReallyLR implicit $q0
+    %w0:_(s32) = COPY $w0
+    %q0:_(<4 x s32>) = COPY $q0
+    %q1:_(<4 x s32>) = COPY $q1
+    %q2:_(<4 x s32>) = COPY $q2
+    %vec_cond0:_(<4 x s1>) = G_ICMP intpred(eq), %q0, %q1
+    %vec_cond1:_(<4 x s1>) = G_ICMP intpred(eq), %q0, %q2
+    %2:_(s32) = G_CONSTANT i32 4100
+    %6:_(s32) = G_FCONSTANT float 0.000000e+00
+    %5:_(<4 x s32>) = G_BUILD_VECTOR %6(s32), %6(s32), %6(s32), %6(s32)
+    %cmp:_(s1) = G_ICMP intpred(eq), %w0, %2
+    %select:_(<4 x s1>) = G_SELECT %cmp, %vec_cond0, %vec_cond1
+    %zext_select:_(<4 x s32>) = G_ZEXT %select
+    $q0 = COPY %zext_select
+    RET_ReallyLR implicit $q0
+
+...