[llvm] f60f7b4 - [InstCombine][X86] Add multiply-by-one handling for MULH/PMULHU/PMULHRS intrinsics

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Wed Jul 3 09:00:01 PDT 2024


Author: Simon Pilgrim
Date: 2024-07-03T16:59:52+01:00
New Revision: f60f7b47a92dbf2f3d994868d3cd43b86a18a76c

URL: https://github.com/llvm/llvm-project/commit/f60f7b47a92dbf2f3d994868d3cd43b86a18a76c
DIFF: https://github.com/llvm/llvm-project/commit/f60f7b47a92dbf2f3d994868d3cd43b86a18a76c.diff

LOG: [InstCombine][X86] Add multiply-by-one handling for MULH/PMULHU/PMULHRS intrinsics

MULH/PMULHU simplifies to ASHR/ZERO as they just become a SEXT/ZEXT sign-splat instruction

PMULHRS doesn't simplify as much so I've not attempted to fold it.

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp
    llvm/test/Transforms/InstCombine/X86/x86-pmulh.ll
    llvm/test/Transforms/InstCombine/X86/x86-pmulhu.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp b/llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp
index 6d4734d477b3e..163584b3750d3 100644
--- a/llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp
+++ b/llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp
@@ -521,6 +521,16 @@ static Value *simplifyX86pmulh(IntrinsicInst &II,
   if (isa<ConstantAggregateZero>(Arg0) || isa<ConstantAggregateZero>(Arg1))
     return ConstantAggregateZero::get(ResTy);
 
+  // Multiply by one.
+  if (!IsRounding) {
+    if (match(Arg0, PatternMatch::m_One()))
+      return IsSigned ? Builder.CreateAShr(Arg1, 15)
+                      : ConstantAggregateZero::get(ResTy);
+    if (match(Arg1, PatternMatch::m_One()))
+      return IsSigned ? Builder.CreateAShr(Arg0, 15)
+                      : ConstantAggregateZero::get(ResTy);
+  }
+
   // Constant folding.
   if (!isa<Constant>(Arg0) || !isa<Constant>(Arg1))
     return nullptr;

diff  --git a/llvm/test/Transforms/InstCombine/X86/x86-pmulh.ll b/llvm/test/Transforms/InstCombine/X86/x86-pmulh.ll
index 699a3c9198e8a..185ab46deed89 100644
--- a/llvm/test/Transforms/InstCombine/X86/x86-pmulh.ll
+++ b/llvm/test/Transforms/InstCombine/X86/x86-pmulh.ll
@@ -111,7 +111,7 @@ define <32 x i16> @zero_pmulh_512_commute(<32 x i16> %a0) {
 
 define <8 x i16> @one_pmulh_128(<8 x i16> %a0) {
 ; CHECK-LABEL: @one_pmulh_128(
-; CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> [[A0:%.*]], <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i16> [[A0:%.*]], <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
 ; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
 ;
   %1 = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> %a0, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
@@ -120,7 +120,7 @@ define <8 x i16> @one_pmulh_128(<8 x i16> %a0) {
 
 define <8 x i16> @one_pmulh_128_commute(<8 x i16> %a0) {
 ; CHECK-LABEL: @one_pmulh_128_commute(
-; CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, <8 x i16> [[A0:%.*]])
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i16> [[A0:%.*]], <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
 ; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
 ;
   %1 = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, <8 x i16> %a0)
@@ -129,7 +129,7 @@ define <8 x i16> @one_pmulh_128_commute(<8 x i16> %a0) {
 
 define <16 x i16> @one_pmulh_256(<16 x i16> %a0) {
 ; CHECK-LABEL: @one_pmulh_256(
-; CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i16> @llvm.x86.avx2.pmulh.w(<16 x i16> [[A0:%.*]], <16 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <16 x i16> [[A0:%.*]], <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
 ; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
 ;
   %1 = call <16 x i16> @llvm.x86.avx2.pmulh.w(<16 x i16> %a0, <16 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
@@ -138,7 +138,7 @@ define <16 x i16> @one_pmulh_256(<16 x i16> %a0) {
 
 define <16 x i16> @one_pmulh_256_commute(<16 x i16> %a0) {
 ; CHECK-LABEL: @one_pmulh_256_commute(
-; CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i16> @llvm.x86.avx2.pmulh.w(<16 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, <16 x i16> [[A0:%.*]])
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <16 x i16> [[A0:%.*]], <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
 ; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
 ;
   %1 = call <16 x i16> @llvm.x86.avx2.pmulh.w(<16 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, <16 x i16> %a0)
@@ -147,7 +147,7 @@ define <16 x i16> @one_pmulh_256_commute(<16 x i16> %a0) {
 
 define <32 x i16> @one_pmulh_512(<32 x i16> %a0) {
 ; CHECK-LABEL: @one_pmulh_512(
-; CHECK-NEXT:    [[TMP1:%.*]] = call <32 x i16> @llvm.x86.avx512.pmulh.w.512(<32 x i16> [[A0:%.*]], <32 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <32 x i16> [[A0:%.*]], <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
 ; CHECK-NEXT:    ret <32 x i16> [[TMP1]]
 ;
   %1 = call <32 x i16> @llvm.x86.avx512.pmulh.w.512(<32 x i16> %a0, <32 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
@@ -156,7 +156,7 @@ define <32 x i16> @one_pmulh_512(<32 x i16> %a0) {
 
 define <32 x i16> @one_pmulh_512_commute(<32 x i16> %a0) {
 ; CHECK-LABEL: @one_pmulh_512_commute(
-; CHECK-NEXT:    [[TMP1:%.*]] = call <32 x i16> @llvm.x86.avx512.pmulh.w.512(<32 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, <32 x i16> [[A0:%.*]])
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <32 x i16> [[A0:%.*]], <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
 ; CHECK-NEXT:    ret <32 x i16> [[TMP1]]
 ;
   %1 = call <32 x i16> @llvm.x86.avx512.pmulh.w.512(<32 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, <32 x i16> %a0)

diff  --git a/llvm/test/Transforms/InstCombine/X86/x86-pmulhu.ll b/llvm/test/Transforms/InstCombine/X86/x86-pmulhu.ll
index e970ae6080612..b18833f703a5f 100644
--- a/llvm/test/Transforms/InstCombine/X86/x86-pmulhu.ll
+++ b/llvm/test/Transforms/InstCombine/X86/x86-pmulhu.ll
@@ -111,8 +111,7 @@ define <32 x i16> @zero_pmulhu_512_commute(<32 x i16> %a0) {
 
 define <8 x i16> @one_pmulhu_128(<8 x i16> %a0) {
 ; CHECK-LABEL: @one_pmulhu_128(
-; CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> [[A0:%.*]], <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
-; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
+; CHECK-NEXT:    ret <8 x i16> zeroinitializer
 ;
   %1 = call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> %a0, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
   ret <8 x i16> %1
@@ -120,8 +119,7 @@ define <8 x i16> @one_pmulhu_128(<8 x i16> %a0) {
 
 define <8 x i16> @one_pmulhu_128_commute(<8 x i16> %a0) {
 ; CHECK-LABEL: @one_pmulhu_128_commute(
-; CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, <8 x i16> [[A0:%.*]])
-; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
+; CHECK-NEXT:    ret <8 x i16> zeroinitializer
 ;
   %1 = call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, <8 x i16> %a0)
   ret <8 x i16> %1
@@ -129,8 +127,7 @@ define <8 x i16> @one_pmulhu_128_commute(<8 x i16> %a0) {
 
 define <16 x i16> @one_pmulhu_256(<16 x i16> %a0) {
 ; CHECK-LABEL: @one_pmulhu_256(
-; CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i16> @llvm.x86.avx2.pmulhu.w(<16 x i16> [[A0:%.*]], <16 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
-; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
+; CHECK-NEXT:    ret <16 x i16> zeroinitializer
 ;
   %1 = call <16 x i16> @llvm.x86.avx2.pmulhu.w(<16 x i16> %a0, <16 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
   ret <16 x i16> %1
@@ -138,8 +135,7 @@ define <16 x i16> @one_pmulhu_256(<16 x i16> %a0) {
 
 define <16 x i16> @one_pmulhu_256_commute(<16 x i16> %a0) {
 ; CHECK-LABEL: @one_pmulhu_256_commute(
-; CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i16> @llvm.x86.avx2.pmulhu.w(<16 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, <16 x i16> [[A0:%.*]])
-; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
+; CHECK-NEXT:    ret <16 x i16> zeroinitializer
 ;
   %1 = call <16 x i16> @llvm.x86.avx2.pmulhu.w(<16 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, <16 x i16> %a0)
   ret <16 x i16> %1
@@ -147,8 +143,7 @@ define <16 x i16> @one_pmulhu_256_commute(<16 x i16> %a0) {
 
 define <32 x i16> @one_pmulhu_512(<32 x i16> %a0) {
 ; CHECK-LABEL: @one_pmulhu_512(
-; CHECK-NEXT:    [[TMP1:%.*]] = call <32 x i16> @llvm.x86.avx512.pmulhu.w.512(<32 x i16> [[A0:%.*]], <32 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
-; CHECK-NEXT:    ret <32 x i16> [[TMP1]]
+; CHECK-NEXT:    ret <32 x i16> zeroinitializer
 ;
   %1 = call <32 x i16> @llvm.x86.avx512.pmulhu.w.512(<32 x i16> %a0, <32 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
   ret <32 x i16> %1
@@ -156,8 +151,7 @@ define <32 x i16> @one_pmulhu_512(<32 x i16> %a0) {
 
 define <32 x i16> @one_pmulhu_512_commute(<32 x i16> %a0) {
 ; CHECK-LABEL: @one_pmulhu_512_commute(
-; CHECK-NEXT:    [[TMP1:%.*]] = call <32 x i16> @llvm.x86.avx512.pmulhu.w.512(<32 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, <32 x i16> [[A0:%.*]])
-; CHECK-NEXT:    ret <32 x i16> [[TMP1]]
+; CHECK-NEXT:    ret <32 x i16> zeroinitializer
 ;
   %1 = call <32 x i16> @llvm.x86.avx512.pmulhu.w.512(<32 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, <32 x i16> %a0)
   ret <32 x i16> %1


        


More information about the llvm-commits mailing list