[llvm] [llvm][InstCombine] bitcast bfloat half castpair bug (PR #79832)

Tue Jan 30 02:15:37 PST 2024

https://github.com/nasherm updated https://github.com/llvm/llvm-project/pull/79832

>From 4f7b10f1f3e5c1b8a729a2f0afe83cb8eeefb21a Mon Sep 17 00:00:00 2001
From: nasmnc01 <nashe.mncube at arm.com>
Date: Fri, 26 Jan 2024 14:30:16 +0000
Subject: [PATCH] [llvm][InstCombine] bitcast bfloat half castpair bug

Miscompilation arises due to instruction combining of cast pairs
of the type `bitcast bfloat to half` + `<FPOp> bfloat to half` or
`bitcast half to bfloat` + `<FPOp half to bfloat`. For example
`bitcast bfloat to half`+`fpext half to double` or
`bitcast bfloat to half`+`fpext bfloat to double` respectively
reduce to `fpext bfloat to double` and `fpext half to double`.
This is an incorrect conversion as it assumes the representation
of `bfloat` and `half` are equivalent due to having the same width.
As a consequence miscompilation arises.

Change-Id: Ie5b7c4b385a946325c60de5495ce3bdf087abc46
---
 llvm/lib/IR/Instructions.cpp                  |  7 ++
 .../InstCombine/bitcast-bfloat-half-mixing.ll | 70 +++++++++++++++++++
 2 files changed, 77 insertions(+)
 create mode 100644 llvm/test/Transforms/InstCombine/bitcast-bfloat-half-mixing.ll

diff --git a/llvm/lib/IR/Instructions.cpp b/llvm/lib/IR/Instructions.cpp
index 87874c3abc468..e268184b17f92 100644
--- a/llvm/lib/IR/Instructions.cpp
+++ b/llvm/lib/IR/Instructions.cpp
@@ -3214,6 +3214,13 @@ unsigned CastInst::isEliminableCastPair(
         return secondOp;
       return 0;
     case 6:
+      // In cast pairs bfloat and half float shouldn't be treated as equivalent
+      // if the first operation is a bitcast i.e. if we have
+      // bitcast bfloat to half + fpext half to double we shouldn't reduce to
+      // fpext bfloat to double as this isn't equal to fpext half to double.
+      // This has been generalised for all float pairs that have the same width.
+      if (SrcTy->getPrimitiveSizeInBits() == MidTy->getPrimitiveSizeInBits())
+        return 0;
       // No-op cast in first op implies secondOp as long as the SrcTy
       // is a floating point.
       if (SrcTy->isFloatingPointTy())
diff --git a/llvm/test/Transforms/InstCombine/bitcast-bfloat-half-mixing.ll b/llvm/test/Transforms/InstCombine/bitcast-bfloat-half-mixing.ll
new file mode 100644
index 0000000000000..3878f45c7326e
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/bitcast-bfloat-half-mixing.ll
@@ -0,0 +1,70 @@
+; RUN: opt -S %s | FileCheck %s
+
+define double @F0([2 x bfloat] %P0) {
+entry:
+  %P0.extract = extractvalue [2 x bfloat] %P0, 1
+  %conv0 = bitcast bfloat %P0.extract to half
+  %0 = fpext half %conv0 to double
+  ret double %0
+}
+
+; CHECK: fpext half %conv0 to double
+; CHECK-NOT: fpext bfloat %P0.extract to double
+
+define double @F1([2 x half] %P1) {
+entry:
+  %P1.extract = extractvalue [2 x half] %P1, 1
+  %conv1 = bitcast half %P1.extract to bfloat
+  %0 = fpext bfloat %conv1 to double
+  ret double %0
+}
+
+; CHECK: fpext bfloat %conv1 to double
+; CHECK-NOT: fpext bfloat %P1.extract to double
+
+define i32 @F2([2 x bfloat] %P2) {
+entry:
+  %P2.extract = extractvalue [2 x bfloat] %P2, 1
+  %conv2 = bitcast bfloat %P2.extract to half
+  %0 = fptoui half %conv2 to i32
+  ret i32 %0
+}
+
+; CHECK: fptoui half %conv2 to i32
+; CHECK-NOT: fptoui bfloat %P2.extract to i32
+
+define i32 @F3([2 x half] %P3) {
+entry:
+  %P3.extract = extractvalue [2 x half] %P3, 1
+  %conv3 = bitcast half %P3.extract to bfloat
+  %0 = fptoui bfloat %conv3 to i32
+  ret i32 %0
+}
+
+; CHECK: fptoui bfloat %conv3 to i32
+; CHECK-NOT: fptoui half %P3.extract to i32
+
+
+define i32 @F4([2 x bfloat] %P4) {
+entry:
+  %P4.extract = extractvalue [2 x bfloat] %P4, 1
+  %conv4 = bitcast bfloat %P4.extract to half
+  %0 = fptosi half %conv4 to i32
+  ret i32 %0
+}
+
+; CHECK: fptosi half %conv4 to i32
+; CHECK-NOT: fptosi bfloat %P4.extract to i32
+
+define i32 @F5([2 x half] %P5) {
+entry:
+  %P5.extract = extractvalue [2 x half] %P5, 1
+  %conv5 = bitcast half %P5.extract to bfloat
+  %0 = fptosi bfloat %conv5 to i32
+  ret i32 %0
+}
+
+; CHECK: fptosi bfloat %conv5 to i32
+; CHECK-NOT: fptosi half %P5.extract to i32
+
+