[llvm-branch-commits] [llvm] release/18.x: Fix overflow flag for i128 USUBO (PR #86475)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Mon Mar 25 01:24:42 PDT 2024
https://github.com/llvmbot created https://github.com/llvm/llvm-project/pull/86475
Backport d9c31ee9568277e4303715736b40925e41503596
Requested by: @uweigand
>From ecfe3a9177f090595eb20f11eb81b781e16dd051 Mon Sep 17 00:00:00 2001
From: Ulrich Weigand <ulrich.weigand at de.ibm.com>
Date: Tue, 19 Mar 2024 11:14:57 +0100
Subject: [PATCH] Fix overflow flag for i128 USUBO
We use the VSCBIQ/VSBIQ/VSBCBIQ family of instructions to implement
USUBO/USUBO_CARRY for the i128 data type. However, these instructions
use an inverted sense of the borrow indication flag (a value of 1
indicates *no* borrow, while a value of 0 indicated borrow). This
does not match the semantics of the boolean "overflow" flag of the
USUBO/USUBO_CARRY ISD nodes.
Fix this by generating code to explicitly invert the flag. These
cancel out of the result of USUBO feeds into an USUBO_CARRY.
To avoid unnecessary zero-extend operations, also improve the
DAGCombine handling of ZERO_EXTEND to optimize (zext (xor (trunc)))
sequences where appropriate.
Fixes: https://github.com/llvm/llvm-project/issues/83268
(cherry picked from commit d9c31ee9568277e4303715736b40925e41503596)
---
.../Target/SystemZ/SystemZISelLowering.cpp | 34 +++++++++++++++++++
llvm/test/CodeGen/SystemZ/int-usub-12.ll | 22 ++++++++++++
llvm/test/CodeGen/SystemZ/int-usub-13.ll | 2 ++
3 files changed, 58 insertions(+)
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
index 5db04a8bef824a..0d22827a92f320 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -4252,6 +4252,7 @@ SDValue SystemZTargetLowering::lowerXALUO(SDValue Op,
if (N->getValueType(0) == MVT::i128) {
unsigned BaseOp = 0;
unsigned FlagOp = 0;
+ bool IsBorrow = false;
switch (Op.getOpcode()) {
default: llvm_unreachable("Unknown instruction!");
case ISD::UADDO:
@@ -4261,6 +4262,7 @@ SDValue SystemZTargetLowering::lowerXALUO(SDValue Op,
case ISD::USUBO:
BaseOp = ISD::SUB;
FlagOp = SystemZISD::VSCBI;
+ IsBorrow = true;
break;
}
SDValue Result = DAG.getNode(BaseOp, DL, MVT::i128, LHS, RHS);
@@ -4268,6 +4270,9 @@ SDValue SystemZTargetLowering::lowerXALUO(SDValue Op,
Flag = DAG.getNode(ISD::AssertZext, DL, MVT::i128, Flag,
DAG.getValueType(MVT::i1));
Flag = DAG.getZExtOrTrunc(Flag, DL, N->getValueType(1));
+ if (IsBorrow)
+ Flag = DAG.getNode(ISD::XOR, DL, Flag.getValueType(),
+ Flag, DAG.getConstant(1, DL, Flag.getValueType()));
return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, Flag);
}
@@ -4340,6 +4345,7 @@ SDValue SystemZTargetLowering::lowerUADDSUBO_CARRY(SDValue Op,
if (VT == MVT::i128) {
unsigned BaseOp = 0;
unsigned FlagOp = 0;
+ bool IsBorrow = false;
switch (Op.getOpcode()) {
default: llvm_unreachable("Unknown instruction!");
case ISD::UADDO_CARRY:
@@ -4349,14 +4355,21 @@ SDValue SystemZTargetLowering::lowerUADDSUBO_CARRY(SDValue Op,
case ISD::USUBO_CARRY:
BaseOp = SystemZISD::VSBI;
FlagOp = SystemZISD::VSBCBI;
+ IsBorrow = true;
break;
}
+ if (IsBorrow)
+ Carry = DAG.getNode(ISD::XOR, DL, Carry.getValueType(),
+ Carry, DAG.getConstant(1, DL, Carry.getValueType()));
Carry = DAG.getZExtOrTrunc(Carry, DL, MVT::i128);
SDValue Result = DAG.getNode(BaseOp, DL, MVT::i128, LHS, RHS, Carry);
SDValue Flag = DAG.getNode(FlagOp, DL, MVT::i128, LHS, RHS, Carry);
Flag = DAG.getNode(ISD::AssertZext, DL, MVT::i128, Flag,
DAG.getValueType(MVT::i1));
Flag = DAG.getZExtOrTrunc(Flag, DL, N->getValueType(1));
+ if (IsBorrow)
+ Flag = DAG.getNode(ISD::XOR, DL, Flag.getValueType(),
+ Flag, DAG.getConstant(1, DL, Flag.getValueType()));
return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, Flag);
}
@@ -6584,6 +6597,27 @@ SDValue SystemZTargetLowering::combineTruncateExtract(
}
}
}
+ // Convert (zext (xor (trunc X), C)) into (xor (trunc X), C') if the size
+ // of the result is smaller than the size of X and all the truncated bits
+ // of X are already zero.
+ if (N0.getOpcode() == ISD::XOR &&
+ N0.hasOneUse() && N0.getOperand(0).hasOneUse() &&
+ N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
+ N0.getOperand(1).getOpcode() == ISD::Constant) {
+ SDValue X = N0.getOperand(0).getOperand(0);
+ if (VT.isScalarInteger() && VT.getSizeInBits() < X.getValueSizeInBits()) {
+ KnownBits Known = DAG.computeKnownBits(X);
+ APInt TruncatedBits = APInt::getBitsSet(X.getValueSizeInBits(),
+ N0.getValueSizeInBits(),
+ VT.getSizeInBits());
+ if (TruncatedBits.isSubsetOf(Known.Zero)) {
+ X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
+ APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
+ return DAG.getNode(ISD::XOR, SDLoc(N0), VT,
+ X, DAG.getConstant(Mask, SDLoc(N0), VT));
+ }
+ }
+ }
return SDValue();
}
diff --git a/llvm/test/CodeGen/SystemZ/int-usub-12.ll b/llvm/test/CodeGen/SystemZ/int-usub-12.ll
index c39a6da37048d3..147fbfd920a9dc 100644
--- a/llvm/test/CodeGen/SystemZ/int-usub-12.ll
+++ b/llvm/test/CodeGen/SystemZ/int-usub-12.ll
@@ -11,6 +11,7 @@ define zeroext i1 @f1(i128 %a, i128 %b, ptr %res) {
; CHECK-NEXT: vscbiq %v2, %v1, %v0
; CHECK-NEXT: vlgvg %r2, %v2, 1
; CHECK-NEXT: vsq %v0, %v1, %v0
+; CHECK-NEXT: xilf %r2, 1
; CHECK-NEXT: vst %v0, 0(%r4), 3
; CHECK-NEXT: br %r14
%t = call {i128, i1} @llvm.usub.with.overflow.i128(i128 %a, i128 %b)
@@ -27,6 +28,7 @@ define zeroext i1 @f2(i128 %a, i128 %b) {
; CHECK-NEXT: vl %v1, 0(%r2), 3
; CHECK-NEXT: vscbiq %v0, %v1, %v0
; CHECK-NEXT: vlgvg %r2, %v0, 1
+; CHECK-NEXT: xilf %r2, 1
; CHECK-NEXT: br %r14
%t = call {i128, i1} @llvm.usub.with.overflow.i128(i128 %a, i128 %b)
%obit = extractvalue {i128, i1} %t, 1
@@ -46,5 +48,25 @@ define i128 @f3(i128 %a, i128 %b) {
ret i128 %val
}
+define i128 @f4(i128 %a, i128 %b) {
+; CHECK-LABEL: f4:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vl %v0, 0(%r4), 3
+; CHECK-NEXT: vl %v1, 0(%r3), 3
+; CHECK-NEXT: vscbiq %v2, %v1, %v0
+; CHECK-NEXT: vlgvf %r0, %v2, 3
+; CHECK-NEXT: vgbm %v2, 0
+; CHECK-NEXT: xilf %r0, 1
+; CHECK-NEXT: jl .LBB3_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: vsq %v2, %v1, %v0
+; CHECK-NEXT: .LBB3_2:
+; CHECK-NEXT: vst %v2, 0(%r2), 3
+; CHECK-NEXT: br %r14
+ %val = call i128 @llvm.usub.sat.i128(i128 %a, i128 %b)
+ ret i128 %val
+}
+
declare {i128, i1} @llvm.usub.with.overflow.i128(i128, i128) nounwind readnone
+declare i128 @llvm.usub.sat.i128(i128, i128) nounwind readnone
diff --git a/llvm/test/CodeGen/SystemZ/int-usub-13.ll b/llvm/test/CodeGen/SystemZ/int-usub-13.ll
index 637e1a81de996f..794af3b73fbe2a 100644
--- a/llvm/test/CodeGen/SystemZ/int-usub-13.ll
+++ b/llvm/test/CodeGen/SystemZ/int-usub-13.ll
@@ -15,6 +15,7 @@ define zeroext i1 @f1(i256 %a, i256 %b, ptr %res) {
; CHECK-NEXT: vlgvg %r2, %v5, 1
; CHECK-NEXT: vsbiq %v0, %v1, %v0, %v4
; CHECK-NEXT: vsq %v1, %v3, %v2
+; CHECK-NEXT: xilf %r2, 1
; CHECK-NEXT: vst %v1, 16(%r4), 3
; CHECK-NEXT: vst %v0, 0(%r4), 3
; CHECK-NEXT: br %r14
@@ -35,6 +36,7 @@ define zeroext i1 @f2(i256 %a, i256 %b) {
; CHECK-NEXT: vscbiq %v2, %v3, %v2
; CHECK-NEXT: vsbcbiq %v0, %v1, %v0, %v2
; CHECK-NEXT: vlgvg %r2, %v0, 1
+; CHECK-NEXT: xilf %r2, 1
; CHECK-NEXT: br %r14
%t = call {i256, i1} @llvm.usub.with.overflow.i256(i256 %a, i256 %b)
%obit = extractvalue {i256, i1} %t, 1
More information about the llvm-branch-commits
mailing list