[llvm] 1e4d882 - [InstCombine] matchFunnelShift - add support for non-uniform vectors containing undefs.

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Wed Oct 14 02:43:52 PDT 2020


Author: Simon Pilgrim
Date: 2020-10-14T10:42:27+01:00
New Revision: 1e4d882f9afe3fa5d0085f056b56eec21eb2c087

URL: https://github.com/llvm/llvm-project/commit/1e4d882f9afe3fa5d0085f056b56eec21eb2c087
DIFF: https://github.com/llvm/llvm-project/commit/1e4d882f9afe3fa5d0085f056b56eec21eb2c087.diff

LOG: [InstCombine] matchFunnelShift - add support for non-uniform vectors containing undefs.

Replace m_SpecificInt with m_APIntAllowUndef to matching splats containing undefs, then use ConstantExpr::mergeUndefsWith to merge the undefs together in the result.

The undef funnel shift amounts are getting replaced with zero later on - I'll address this in a later patch, otherwise we lose potential shift by splat value patterns.

Added: 
    

Modified: 
    llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
    llvm/test/Transforms/InstCombine/funnel.ll
    llvm/test/Transforms/InstCombine/rotate.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index fb415bf20d1a..7f5f05d04139 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -2090,15 +2090,14 @@ static Instruction *matchFunnelShift(Instruction &Or, InstCombinerImpl &IC) {
       if (LI->ult(Width) && RI->ult(Width) && (*LI + *RI) == Width)
         return ConstantInt::get(L->getType(), *LI);
 
-    // TODO: Support undefs in non-uniform shift amounts.
+    const APInt *SumC;
     Constant *LC, *RC;
-    if (match(L, m_Constant(LC)) && !LC->containsUndefElement() &&
-        match(R, m_Constant(RC)) && !RC->containsUndefElement() &&
+    if (match(L, m_Constant(LC)) && match(R, m_Constant(RC)) &&
         match(L, m_SpecificInt_ICMP(ICmpInst::ICMP_ULT, APInt(Width, Width))) &&
-        match(R, m_SpecificInt_ICMP(ICmpInst::ICMP_ULT, APInt(Width, Width)))) {
-      if (match(ConstantExpr::getAdd(LC, RC), m_SpecificInt(Width)))
-        return L;
-    }
+        match(R, m_SpecificInt_ICMP(ICmpInst::ICMP_ULT, APInt(Width, Width))) &&
+        match(ConstantExpr::getAdd(LC, RC), m_APIntAllowUndef(SumC)) &&
+        *SumC == Width)
+      return ConstantExpr::mergeUndefsWith(LC, RC);
 
     // (shl ShVal, X) | (lshr ShVal, (Width - x)) iff X < Width.
     // We limit this to X < Width in case the backend re-expands the intrinsic,

diff  --git a/llvm/test/Transforms/InstCombine/funnel.ll b/llvm/test/Transforms/InstCombine/funnel.ll
index 7277681a1088..5c9427973430 100644
--- a/llvm/test/Transforms/InstCombine/funnel.ll
+++ b/llvm/test/Transforms/InstCombine/funnel.ll
@@ -101,7 +101,7 @@ define <2 x i17> @fshr_v2i17_constant_splat_undef1(<2 x i17> %x, <2 x i17> %y) {
 }
 
 ; Allow arbitrary shift constants.
-; TODO: Support undef elements.
+; Support undef elements.
 
 define <2 x i32> @fshr_v2i32_constant_nonsplat(<2 x i32> %x, <2 x i32> %y) {
 ; CHECK-LABEL: @fshr_v2i32_constant_nonsplat(
@@ -116,9 +116,7 @@ define <2 x i32> @fshr_v2i32_constant_nonsplat(<2 x i32> %x, <2 x i32> %y) {
 
 define <2 x i32> @fshr_v2i32_constant_nonsplat_undef0(<2 x i32> %x, <2 x i32> %y) {
 ; CHECK-LABEL: @fshr_v2i32_constant_nonsplat_undef0(
-; CHECK-NEXT:    [[SHR:%.*]] = lshr <2 x i32> [[X:%.*]], <i32 undef, i32 19>
-; CHECK-NEXT:    [[SHL:%.*]] = shl <2 x i32> [[Y:%.*]], <i32 15, i32 13>
-; CHECK-NEXT:    [[R:%.*]] = or <2 x i32> [[SHL]], [[SHR]]
+; CHECK-NEXT:    [[R:%.*]] = call <2 x i32> @llvm.fshl.v2i32(<2 x i32> [[Y:%.*]], <2 x i32> [[X:%.*]], <2 x i32> <i32 0, i32 13>)
 ; CHECK-NEXT:    ret <2 x i32> [[R]]
 ;
   %shr = lshr <2 x i32> %x, <i32 undef, i32 19>
@@ -129,9 +127,7 @@ define <2 x i32> @fshr_v2i32_constant_nonsplat_undef0(<2 x i32> %x, <2 x i32> %y
 
 define <2 x i32> @fshr_v2i32_constant_nonsplat_undef1(<2 x i32> %x, <2 x i32> %y) {
 ; CHECK-LABEL: @fshr_v2i32_constant_nonsplat_undef1(
-; CHECK-NEXT:    [[SHR:%.*]] = lshr <2 x i32> [[X:%.*]], <i32 17, i32 19>
-; CHECK-NEXT:    [[SHL:%.*]] = shl <2 x i32> [[Y:%.*]], <i32 15, i32 undef>
-; CHECK-NEXT:    [[R:%.*]] = or <2 x i32> [[SHL]], [[SHR]]
+; CHECK-NEXT:    [[R:%.*]] = call <2 x i32> @llvm.fshl.v2i32(<2 x i32> [[Y:%.*]], <2 x i32> [[X:%.*]], <2 x i32> <i32 15, i32 0>)
 ; CHECK-NEXT:    ret <2 x i32> [[R]]
 ;
   %shr = lshr <2 x i32> %x, <i32 17, i32 19>
@@ -153,9 +149,7 @@ define <2 x i36> @fshl_v2i36_constant_nonsplat(<2 x i36> %x, <2 x i36> %y) {
 
 define <3 x i36> @fshl_v3i36_constant_nonsplat_undef0(<3 x i36> %x, <3 x i36> %y) {
 ; CHECK-LABEL: @fshl_v3i36_constant_nonsplat_undef0(
-; CHECK-NEXT:    [[SHL:%.*]] = shl <3 x i36> [[X:%.*]], <i36 21, i36 11, i36 undef>
-; CHECK-NEXT:    [[SHR:%.*]] = lshr <3 x i36> [[Y:%.*]], <i36 15, i36 25, i36 undef>
-; CHECK-NEXT:    [[R:%.*]] = or <3 x i36> [[SHL]], [[SHR]]
+; CHECK-NEXT:    [[R:%.*]] = call <3 x i36> @llvm.fshl.v3i36(<3 x i36> [[X:%.*]], <3 x i36> [[Y:%.*]], <3 x i36> <i36 21, i36 11, i36 0>)
 ; CHECK-NEXT:    ret <3 x i36> [[R]]
 ;
   %shl = shl <3 x i36> %x, <i36 21, i36 11, i36 undef>

diff  --git a/llvm/test/Transforms/InstCombine/rotate.ll b/llvm/test/Transforms/InstCombine/rotate.ll
index 363750ea13a1..270d778a3091 100644
--- a/llvm/test/Transforms/InstCombine/rotate.ll
+++ b/llvm/test/Transforms/InstCombine/rotate.ll
@@ -123,7 +123,7 @@ define <2 x i17> @rotr_v2i17_constant_splat_undef1(<2 x i17> %x) {
 }
 
 ; Allow arbitrary shift constants.
-; TODO: Support undef elements.
+; Support undef elements.
 
 define <2 x i32> @rotr_v2i32_constant_nonsplat(<2 x i32> %x) {
 ; CHECK-LABEL: @rotr_v2i32_constant_nonsplat(
@@ -138,9 +138,7 @@ define <2 x i32> @rotr_v2i32_constant_nonsplat(<2 x i32> %x) {
 
 define <2 x i32> @rotr_v2i32_constant_nonsplat_undef0(<2 x i32> %x) {
 ; CHECK-LABEL: @rotr_v2i32_constant_nonsplat_undef0(
-; CHECK-NEXT:    [[SHL:%.*]] = shl <2 x i32> [[X:%.*]], <i32 undef, i32 19>
-; CHECK-NEXT:    [[SHR:%.*]] = lshr <2 x i32> [[X]], <i32 15, i32 13>
-; CHECK-NEXT:    [[R:%.*]] = or <2 x i32> [[SHL]], [[SHR]]
+; CHECK-NEXT:    [[R:%.*]] = call <2 x i32> @llvm.fshl.v2i32(<2 x i32> [[X:%.*]], <2 x i32> [[X]], <2 x i32> <i32 0, i32 19>)
 ; CHECK-NEXT:    ret <2 x i32> [[R]]
 ;
   %shl = shl <2 x i32> %x, <i32 undef, i32 19>
@@ -151,9 +149,7 @@ define <2 x i32> @rotr_v2i32_constant_nonsplat_undef0(<2 x i32> %x) {
 
 define <2 x i32> @rotr_v2i32_constant_nonsplat_undef1(<2 x i32> %x) {
 ; CHECK-LABEL: @rotr_v2i32_constant_nonsplat_undef1(
-; CHECK-NEXT:    [[SHL:%.*]] = shl <2 x i32> [[X:%.*]], <i32 17, i32 19>
-; CHECK-NEXT:    [[SHR:%.*]] = lshr <2 x i32> [[X]], <i32 15, i32 undef>
-; CHECK-NEXT:    [[R:%.*]] = or <2 x i32> [[SHL]], [[SHR]]
+; CHECK-NEXT:    [[R:%.*]] = call <2 x i32> @llvm.fshl.v2i32(<2 x i32> [[X:%.*]], <2 x i32> [[X]], <2 x i32> <i32 17, i32 0>)
 ; CHECK-NEXT:    ret <2 x i32> [[R]]
 ;
   %shl = shl <2 x i32> %x, <i32 17, i32 19>
@@ -175,9 +171,7 @@ define <2 x i36> @rotl_v2i36_constant_nonsplat(<2 x i36> %x) {
 
 define <3 x i36> @rotl_v3i36_constant_nonsplat_undef0(<3 x i36> %x) {
 ; CHECK-LABEL: @rotl_v3i36_constant_nonsplat_undef0(
-; CHECK-NEXT:    [[SHL:%.*]] = shl <3 x i36> [[X:%.*]], <i36 21, i36 11, i36 undef>
-; CHECK-NEXT:    [[SHR:%.*]] = lshr <3 x i36> [[X]], <i36 15, i36 25, i36 undef>
-; CHECK-NEXT:    [[R:%.*]] = or <3 x i36> [[SHL]], [[SHR]]
+; CHECK-NEXT:    [[R:%.*]] = call <3 x i36> @llvm.fshl.v3i36(<3 x i36> [[X:%.*]], <3 x i36> [[X]], <3 x i36> <i36 21, i36 11, i36 0>)
 ; CHECK-NEXT:    ret <3 x i36> [[R]]
 ;
   %shl = shl <3 x i36> %x, <i36 21, i36 11, i36 undef>


        


More information about the llvm-commits mailing list