[llvm] InstCombine: fix transformation of bitwiseAnd to fabs (PR #71257)

Fri Nov 3 17:07:36 PDT 2023

llvmbot wrote:




@llvm/pr-subscribers-llvm-transforms

Author: Romaric Jodin (rjodinchr)

<details>
<summary>Changes</summary>

Since 5c0da5839de1 instcombine transform pattern masking the MSB with a call to fabs.

While this might be correct in C, it is not in OpenCL. Math operations are allowed to flush denormalized numbers to zero in OpenCL. Thus what should be just masking the MSB starts to change much more data.

This is an issue in libclc as those implementations are using this pattern and rely on the fact that only the MSB is masked. Using fabs instead breaks conformance on platforms where fabs flushes denormalized numbers to zero.

Instead look for the complete pattern:
```
bitcast float to int
bitwiseand
bitcast int to float
=>
fabs
```

Fix #69464

---
Full diff: https://github.com/llvm/llvm-project/pull/71257.diff


5 Files Affected:

- (modified) llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp (-22) 
- (modified) llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp (+26) 
- (modified) llvm/test/Transforms/InstCombine/fabs-as-int.ll (+29-27) 
- (modified) llvm/test/Transforms/SLPVectorizer/X86/alternate-cast-inseltpoison.ll (+6-4) 
- (modified) llvm/test/Transforms/SLPVectorizer/X86/alternate-cast.ll (+6-4) 


``````````diff

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 070d386b2f18d24..306f058037e416a 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -2396,28 +2396,6 @@ Instruction *InstCombinerImpl::visitAnd(BinaryOperator &I) {
     }
   }
 
-  // If we are clearing the sign bit of a floating-point value, convert this to
-  // fabs, then cast back to integer.
-  //
-  // This is a generous interpretation for noimplicitfloat, this is not a true
-  // floating-point operation.
-  //
-  // Assumes any IEEE-represented type has the sign bit in the high bit.
-  // TODO: Unify with APInt matcher. This version allows undef unlike m_APInt
-  Value *CastOp;
-  if (match(Op0, m_BitCast(m_Value(CastOp))) &&
-      match(Op1, m_MaxSignedValue()) &&
-      !Builder.GetInsertBlock()->getParent()->hasFnAttribute(
-        Attribute::NoImplicitFloat)) {
-    Type *EltTy = CastOp->getType()->getScalarType();
-    if (EltTy->isFloatingPointTy() && EltTy->isIEEE() &&
-        EltTy->getPrimitiveSizeInBits() ==
-        I.getType()->getScalarType()->getPrimitiveSizeInBits()) {
-      Value *FAbs = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, CastOp);
-      return new BitCastInst(FAbs, I.getType());
-    }
-  }
-
   if (match(&I, m_And(m_OneUse(m_Shl(m_ZExt(m_Value(X)), m_Value(Y))),
                       m_SignMask())) &&
       match(Y, m_SpecificInt_ICMP(
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
index 2127000c4b780b2..8461de8fdbce758 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -2546,6 +2546,32 @@ Instruction *InstCombinerImpl::visitBitCast(BitCastInst &CI) {
   if (DestTy == Src->getType())
     return replaceInstUsesWith(CI, Src);
 
+  // If we are clearing the sign bit of a floating-point value, convert this to
+  // fabs
+  //
+  // This is a generous interpretation for noimplicitfloat, this is not a true
+  // floating-point operation.
+  //
+  // Assumes any IEEE-represented type has the sign bit in the high bit.
+  if (!Builder.GetInsertBlock()->getParent()->hasFnAttribute(
+          Attribute::NoImplicitFloat) &&
+      DestTy->isFloatingPointTy() && DestTy->isIEEE()) {
+    Value *L, *R;
+    if (match(Src, m_And(m_Value(L), m_Value(R)))) {
+      Value *Cast;
+      if (match(L, m_BitCast(m_Value(Cast))) && match(R, m_MaxSignedValue())) {
+        Type *EltTy = Cast->getType()->getScalarType();
+        if (EltTy->isFloatingPointTy() && EltTy->isIEEE() &&
+            EltTy->getPrimitiveSizeInBits() ==
+                Src->getType()->getScalarType()->getPrimitiveSizeInBits()) {
+          return CallInst::Create(Intrinsic::getDeclaration(
+                                      CI.getModule(), Intrinsic::fabs, DestTy),
+                                  {Cast});
+        }
+      }
+    }
+  }
+
   if (FixedVectorType *DestVTy = dyn_cast<FixedVectorType>(DestTy)) {
     // Beware: messing with this target-specific oddity may cause trouble.
     if (DestVTy->getNumElements() == 1 && SrcTy->isX86_MMXTy()) {
diff --git a/llvm/test/Transforms/InstCombine/fabs-as-int.ll b/llvm/test/Transforms/InstCombine/fabs-as-int.ll
index f32c00e453f22d9..d1543ec1e06f186 100644
--- a/llvm/test/Transforms/InstCombine/fabs-as-int.ll
+++ b/llvm/test/Transforms/InstCombine/fabs-as-int.ll
@@ -54,9 +54,11 @@ define float @not_fabs_as_int_f32_castback_wrongconst(float %val) {
 define float @fabs_as_int_f32_castback_multi_use(float %val, ptr %ptr) {
 ; CHECK-LABEL: define float @fabs_as_int_f32_castback_multi_use
 ; CHECK-SAME: (float [[VAL:%.*]], ptr [[PTR:%.*]]) {
-; CHECK-NEXT:    [[TMP1:%.*]] = call float @llvm.fabs.f32(float [[VAL]])
-; CHECK-NEXT:    store float [[TMP1]], ptr [[PTR]], align 4
-; CHECK-NEXT:    ret float [[TMP1]]
+; CHECK-NEXT:    [[BITCAST:%.*]] = bitcast float [[VAL]] to i32
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[BITCAST]], 2147483647
+; CHECK-NEXT:    store i32 [[AND]], ptr [[PTR]], align 4
+; CHECK-NEXT:    [[FABS:%.*]] = call float @llvm.fabs.f32(float [[VAL]])
+; CHECK-NEXT:    ret float [[FABS]]
 ;
   %bitcast = bitcast float %val to i32
   %and = and i32 %bitcast, 2147483647
@@ -68,8 +70,8 @@ define float @fabs_as_int_f32_castback_multi_use(float %val, ptr %ptr) {
 define i64 @fabs_as_int_f64(double %x) {
 ; CHECK-LABEL: define i64 @fabs_as_int_f64
 ; CHECK-SAME: (double [[X:%.*]]) {
-; CHECK-NEXT:    [[TMP1:%.*]] = call double @llvm.fabs.f64(double [[X]])
-; CHECK-NEXT:    [[AND:%.*]] = bitcast double [[TMP1]] to i64
+; CHECK-NEXT:    [[BC:%.*]] = bitcast double [[X]] to i64
+; CHECK-NEXT:    [[AND:%.*]] = and i64 [[BC]], 9223372036854775807
 ; CHECK-NEXT:    ret i64 [[AND]]
 ;
   %bc = bitcast double %x to i64
@@ -80,8 +82,8 @@ define i64 @fabs_as_int_f64(double %x) {
 define <2 x i64> @fabs_as_int_v2f64(<2 x double> %x) {
 ; CHECK-LABEL: define <2 x i64> @fabs_as_int_v2f64
 ; CHECK-SAME: (<2 x double> [[X:%.*]]) {
-; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x double> @llvm.fabs.v2f64(<2 x double> [[X]])
-; CHECK-NEXT:    [[AND:%.*]] = bitcast <2 x double> [[TMP1]] to <2 x i64>
+; CHECK-NEXT:    [[BC:%.*]] = bitcast <2 x double> [[X]] to <2 x i64>
+; CHECK-NEXT:    [[AND:%.*]] = and <2 x i64> [[BC]], <i64 9223372036854775807, i64 9223372036854775807>
 ; CHECK-NEXT:    ret <2 x i64> [[AND]]
 ;
   %bc = bitcast <2 x double> %x to <2 x i64>
@@ -92,8 +94,8 @@ define <2 x i64> @fabs_as_int_v2f64(<2 x double> %x) {
 define i64 @fabs_as_int_f64_swap(double %x) {
 ; CHECK-LABEL: define i64 @fabs_as_int_f64_swap
 ; CHECK-SAME: (double [[X:%.*]]) {
-; CHECK-NEXT:    [[TMP1:%.*]] = call double @llvm.fabs.f64(double [[X]])
-; CHECK-NEXT:    [[AND:%.*]] = bitcast double [[TMP1]] to i64
+; CHECK-NEXT:    [[BC:%.*]] = bitcast double [[X]] to i64
+; CHECK-NEXT:    [[AND:%.*]] = and i64 [[BC]], 9223372036854775807
 ; CHECK-NEXT:    ret i64 [[AND]]
 ;
   %bc = bitcast double %x to i64
@@ -104,8 +106,8 @@ define i64 @fabs_as_int_f64_swap(double %x) {
 define i32 @fabs_as_int_f32(float %x) {
 ; CHECK-LABEL: define i32 @fabs_as_int_f32
 ; CHECK-SAME: (float [[X:%.*]]) {
-; CHECK-NEXT:    [[TMP1:%.*]] = call float @llvm.fabs.f32(float [[X]])
-; CHECK-NEXT:    [[AND:%.*]] = bitcast float [[TMP1]] to i32
+; CHECK-NEXT:    [[BC:%.*]] = bitcast float [[X]] to i32
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[BC]], 2147483647
 ; CHECK-NEXT:    ret i32 [[AND]]
 ;
   %bc = bitcast float %x to i32
@@ -116,8 +118,8 @@ define i32 @fabs_as_int_f32(float %x) {
 define <2 x i32> @fabs_as_int_v2f32(<2 x float> %x) {
 ; CHECK-LABEL: define <2 x i32> @fabs_as_int_v2f32
 ; CHECK-SAME: (<2 x float> [[X:%.*]]) {
-; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x float> @llvm.fabs.v2f32(<2 x float> [[X]])
-; CHECK-NEXT:    [[AND:%.*]] = bitcast <2 x float> [[TMP1]] to <2 x i32>
+; CHECK-NEXT:    [[BC:%.*]] = bitcast <2 x float> [[X]] to <2 x i32>
+; CHECK-NEXT:    [[AND:%.*]] = and <2 x i32> [[BC]], <i32 2147483647, i32 2147483647>
 ; CHECK-NEXT:    ret <2 x i32> [[AND]]
 ;
   %bc = bitcast <2 x float> %x to <2 x i32>
@@ -140,8 +142,8 @@ define <2 x i32> @not_fabs_as_int_v2f32_nonsplat(<2 x float> %x) {
 define <3 x i32> @fabs_as_int_v3f32_undef(<3 x float> %x) {
 ; CHECK-LABEL: define <3 x i32> @fabs_as_int_v3f32_undef
 ; CHECK-SAME: (<3 x float> [[X:%.*]]) {
-; CHECK-NEXT:    [[TMP1:%.*]] = call <3 x float> @llvm.fabs.v3f32(<3 x float> [[X]])
-; CHECK-NEXT:    [[AND:%.*]] = bitcast <3 x float> [[TMP1]] to <3 x i32>
+; CHECK-NEXT:    [[BC:%.*]] = bitcast <3 x float> [[X]] to <3 x i32>
+; CHECK-NEXT:    [[AND:%.*]] = and <3 x i32> [[BC]], <i32 2147483647, i32 undef, i32 2147483647>
 ; CHECK-NEXT:    ret <3 x i32> [[AND]]
 ;
   %bc = bitcast <3 x float> %x to <3 x i32>
@@ -193,8 +195,8 @@ define float @not_fabs_as_int_f32_bitcast_from_v2i16(<2 x i16> %val) {
 define i128 @fabs_as_int_fp128_f64_mask(fp128 %x) {
 ; CHECK-LABEL: define i128 @fabs_as_int_fp128_f64_mask
 ; CHECK-SAME: (fp128 [[X:%.*]]) {
-; CHECK-NEXT:    [[TMP1:%.*]] = call fp128 @llvm.fabs.f128(fp128 [[X]])
-; CHECK-NEXT:    [[AND:%.*]] = bitcast fp128 [[TMP1]] to i128
+; CHECK-NEXT:    [[BC:%.*]] = bitcast fp128 [[X]] to i128
+; CHECK-NEXT:    [[AND:%.*]] = and i128 [[BC]], 170141183460469231731687303715884105727
 ; CHECK-NEXT:    ret i128 [[AND]]
 ;
   %bc = bitcast fp128 %x to i128
@@ -205,8 +207,8 @@ define i128 @fabs_as_int_fp128_f64_mask(fp128 %x) {
 define i128 @fabs_as_int_fp128_f128_mask(fp128 %x) {
 ; CHECK-LABEL: define i128 @fabs_as_int_fp128_f128_mask
 ; CHECK-SAME: (fp128 [[X:%.*]]) {
-; CHECK-NEXT:    [[TMP1:%.*]] = call fp128 @llvm.fabs.f128(fp128 [[X]])
-; CHECK-NEXT:    [[AND:%.*]] = bitcast fp128 [[TMP1]] to i128
+; CHECK-NEXT:    [[BC:%.*]] = bitcast fp128 [[X]] to i128
+; CHECK-NEXT:    [[AND:%.*]] = and i128 [[BC]], 170141183460469231731687303715884105727
 ; CHECK-NEXT:    ret i128 [[AND]]
 ;
   %bc = bitcast fp128 %x to i128
@@ -217,8 +219,8 @@ define i128 @fabs_as_int_fp128_f128_mask(fp128 %x) {
 define i16 @fabs_as_int_f16(half %x) {
 ; CHECK-LABEL: define i16 @fabs_as_int_f16
 ; CHECK-SAME: (half [[X:%.*]]) {
-; CHECK-NEXT:    [[TMP1:%.*]] = call half @llvm.fabs.f16(half [[X]])
-; CHECK-NEXT:    [[AND:%.*]] = bitcast half [[TMP1]] to i16
+; CHECK-NEXT:    [[BC:%.*]] = bitcast half [[X]] to i16
+; CHECK-NEXT:    [[AND:%.*]] = and i16 [[BC]], 32767
 ; CHECK-NEXT:    ret i16 [[AND]]
 ;
   %bc = bitcast half %x to i16
@@ -229,8 +231,8 @@ define i16 @fabs_as_int_f16(half %x) {
 define <2 x i16> @fabs_as_int_v2f16(<2 x half> %x) {
 ; CHECK-LABEL: define <2 x i16> @fabs_as_int_v2f16
 ; CHECK-SAME: (<2 x half> [[X:%.*]]) {
-; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x half> @llvm.fabs.v2f16(<2 x half> [[X]])
-; CHECK-NEXT:    [[AND:%.*]] = bitcast <2 x half> [[TMP1]] to <2 x i16>
+; CHECK-NEXT:    [[BC:%.*]] = bitcast <2 x half> [[X]] to <2 x i16>
+; CHECK-NEXT:    [[AND:%.*]] = and <2 x i16> [[BC]], <i16 32767, i16 32767>
 ; CHECK-NEXT:    ret <2 x i16> [[AND]]
 ;
   %bc = bitcast <2 x half> %x to <2 x i16>
@@ -241,8 +243,8 @@ define <2 x i16> @fabs_as_int_v2f16(<2 x half> %x) {
 define i16 @fabs_as_int_bf16(bfloat %x) {
 ; CHECK-LABEL: define i16 @fabs_as_int_bf16
 ; CHECK-SAME: (bfloat [[X:%.*]]) {
-; CHECK-NEXT:    [[TMP1:%.*]] = call bfloat @llvm.fabs.bf16(bfloat [[X]])
-; CHECK-NEXT:    [[AND:%.*]] = bitcast bfloat [[TMP1]] to i16
+; CHECK-NEXT:    [[BC:%.*]] = bitcast bfloat [[X]] to i16
+; CHECK-NEXT:    [[AND:%.*]] = and i16 [[BC]], 32767
 ; CHECK-NEXT:    ret i16 [[AND]]
 ;
   %bc = bitcast bfloat %x to i16
@@ -253,8 +255,8 @@ define i16 @fabs_as_int_bf16(bfloat %x) {
 define <2 x i16> @fabs_as_int_v2bf16(<2 x bfloat> %x) {
 ; CHECK-LABEL: define <2 x i16> @fabs_as_int_v2bf16
 ; CHECK-SAME: (<2 x bfloat> [[X:%.*]]) {
-; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x bfloat> @llvm.fabs.v2bf16(<2 x bfloat> [[X]])
-; CHECK-NEXT:    [[AND:%.*]] = bitcast <2 x bfloat> [[TMP1]] to <2 x i16>
+; CHECK-NEXT:    [[BC:%.*]] = bitcast <2 x bfloat> [[X]] to <2 x i16>
+; CHECK-NEXT:    [[AND:%.*]] = and <2 x i16> [[BC]], <i16 32767, i16 32767>
 ; CHECK-NEXT:    ret <2 x i16> [[AND]]
 ;
   %bc = bitcast <2 x bfloat> %x to <2 x i16>
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/alternate-cast-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/alternate-cast-inseltpoison.ll
index e24c52ba81ddf46..d818f0f11167019 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/alternate-cast-inseltpoison.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/alternate-cast-inseltpoison.ll
@@ -76,10 +76,12 @@ define <8 x i32> @fptosi_fptoui(<8 x float> %a) {
 
 define <8 x float> @fneg_fabs(<8 x float> %a) {
 ; CHECK-LABEL: @fneg_fabs(
-; CHECK-NEXT:    [[TMP1:%.*]] = fneg <8 x float> [[A:%.*]]
-; CHECK-NEXT:    [[TMP2:%.*]] = call <8 x float> @llvm.fabs.v8f32(<8 x float> [[A]])
-; CHECK-NEXT:    [[DOTUNCASTED:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> [[TMP2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
-; CHECK-NEXT:    ret <8 x float> [[DOTUNCASTED]]
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x float> [[A:%.*]] to <8 x i32>
+; CHECK-NEXT:    [[TMP2:%.*]] = fneg <8 x float> [[A]]
+; CHECK-NEXT:    [[TMP3:%.*]] = and <8 x i32> [[TMP1]], <i32 poison, i32 poison, i32 poison, i32 poison, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647>
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <8 x i32> [[TMP3]] to <8 x float>
+; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <8 x float> [[TMP2]], <8 x float> [[TMP4]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
+; CHECK-NEXT:    ret <8 x float> [[TMP5]]
 ;
   %a0 = extractelement <8 x float> %a, i32 0
   %a1 = extractelement <8 x float> %a, i32 1
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/alternate-cast.ll b/llvm/test/Transforms/SLPVectorizer/X86/alternate-cast.ll
index 0f8751a6da7f578..7ab02f55fc92710 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/alternate-cast.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/alternate-cast.ll
@@ -76,10 +76,12 @@ define <8 x i32> @fptosi_fptoui(<8 x float> %a) {
 
 define <8 x float> @fneg_fabs(<8 x float> %a) {
 ; CHECK-LABEL: @fneg_fabs(
-; CHECK-NEXT:    [[TMP1:%.*]] = fneg <8 x float> [[A:%.*]]
-; CHECK-NEXT:    [[TMP2:%.*]] = call <8 x float> @llvm.fabs.v8f32(<8 x float> [[A]])
-; CHECK-NEXT:    [[DOTUNCASTED:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> [[TMP2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
-; CHECK-NEXT:    ret <8 x float> [[DOTUNCASTED]]
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x float> [[A:%.*]] to <8 x i32>
+; CHECK-NEXT:    [[TMP2:%.*]] = fneg <8 x float> [[A]]
+; CHECK-NEXT:    [[TMP3:%.*]] = and <8 x i32> [[TMP1]], <i32 poison, i32 poison, i32 poison, i32 poison, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647>
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <8 x i32> [[TMP3]] to <8 x float>
+; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <8 x float> [[TMP2]], <8 x float> [[TMP4]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
+; CHECK-NEXT:    ret <8 x float> [[TMP5]]
 ;
   %a0 = extractelement <8 x float> %a, i32 0
   %a1 = extractelement <8 x float> %a, i32 1

``````````

</details>


https://github.com/llvm/llvm-project/pull/71257