[PATCH] D102528: [NVPTX] Fix lowering of frem for negative values
Thomas Raoux via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Fri May 14 13:18:05 PDT 2021
ThomasRaoux created this revision.
Herald added subscribers: hiraditya, jholewinski.
Herald added a reviewer: mravishankar.
ThomasRaoux requested review of this revision.
Herald added a project: LLVM.
Herald added a subscriber: llvm-commits.
to match fmod frem result must have the dividend sign. Previous implementation had the wrong sign when passing negative numbers. For ex: frem(-16, 7) was returning 5 instead of -2. We should just a ftrunc instead of floor when lowering to get the right behavior.
https://reviews.llvm.org/D102528
Files:
llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
llvm/test/CodeGen/NVPTX/f16-instructions.ll
llvm/test/CodeGen/NVPTX/f16x2-instructions.ll
Index: llvm/test/CodeGen/NVPTX/f16x2-instructions.ll
===================================================================
--- llvm/test/CodeGen/NVPTX/f16x2-instructions.ll
+++ llvm/test/CodeGen/NVPTX/f16x2-instructions.ll
@@ -236,12 +236,12 @@
; CHECK-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]];
; -- frem(a[0],b[0]).
; CHECK-DAG: div.rn.f32 [[FD0:%f[0-9]+]], [[FA0]], [[FB0]];
-; CHECK-DAG: cvt.rmi.f32.f32 [[DI0:%f[0-9]+]], [[FD0]];
+; CHECK-DAG: cvt.rzi.f32.f32 [[DI0:%f[0-9]+]], [[FD0]];
; CHECK-DAG: mul.f32 [[RI0:%f[0-9]+]], [[DI0]], [[FB0]];
; CHECK-DAG: sub.f32 [[RF0:%f[0-9]+]], [[FA0]], [[RI0]];
; -- frem(a[1],b[1]).
; CHECK-DAG: div.rn.f32 [[FD1:%f[0-9]+]], [[FA1]], [[FB1]];
-; CHECK-DAG: cvt.rmi.f32.f32 [[DI1:%f[0-9]+]], [[FD1]];
+; CHECK-DAG: cvt.rzi.f32.f32 [[DI1:%f[0-9]+]], [[FD1]];
; CHECK-DAG: mul.f32 [[RI1:%f[0-9]+]], [[DI1]], [[FB1]];
; CHECK-DAG: sub.f32 [[RF1:%f[0-9]+]], [[FA1]], [[RI1]];
; -- convert back to f16.
Index: llvm/test/CodeGen/NVPTX/f16-instructions.ll
===================================================================
--- llvm/test/CodeGen/NVPTX/f16-instructions.ll
+++ llvm/test/CodeGen/NVPTX/f16-instructions.ll
@@ -164,13 +164,13 @@
; CHECK-NOFTZ-DAG: cvt.f32.f16 [[FA:%f[0-9]+]], [[A]];
; CHECK-NOFTZ-DAG: cvt.f32.f16 [[FB:%f[0-9]+]], [[B]];
; CHECK-NOFTZ-NEXT: div.rn.f32 [[D:%f[0-9]+]], [[FA]], [[FB]];
-; CHECK-NOFTZ-NEXT: cvt.rmi.f32.f32 [[DI:%f[0-9]+]], [[D]];
+; CHECK-NOFTZ-NEXT: cvt.rzi.f32.f32 [[DI:%f[0-9]+]], [[D]];
; CHECK-NOFTZ-NEXT: mul.f32 [[RI:%f[0-9]+]], [[DI]], [[FB]];
; CHECK-NOFTZ-NEXT: sub.f32 [[RF:%f[0-9]+]], [[FA]], [[RI]];
; CHECK-F16-FTZ-DAG: cvt.ftz.f32.f16 [[FA:%f[0-9]+]], [[A]];
; CHECK-F16-FTZ-DAG: cvt.ftz.f32.f16 [[FB:%f[0-9]+]], [[B]];
; CHECK-F16-FTZ-NEXT: div.rn.ftz.f32 [[D:%f[0-9]+]], [[FA]], [[FB]];
-; CHECK-F16-FTZ-NEXT: cvt.rmi.ftz.f32.f32 [[DI:%f[0-9]+]], [[D]];
+; CHECK-F16-FTZ-NEXT: cvt.rzi.ftz.f32.f32 [[DI:%f[0-9]+]], [[D]];
; CHECK-F16-FTZ-NEXT: mul.ftz.f32 [[RI:%f[0-9]+]], [[DI]], [[FB]];
; CHECK-F16-FTZ-NEXT: sub.ftz.f32 [[RF:%f[0-9]+]], [[FA]], [[RI]];
; CHECK-NEXT: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[RF]];
Index: llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
===================================================================
--- llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
+++ llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
@@ -1039,39 +1039,39 @@
[(set Float32Regs:$dst, (fcos Float32Regs:$src))]>,
Requires<[allowUnsafeFPMath]>;
-// Lower (frem x, y) into (sub x, (mul (floor (div x, y)) y)),
+// Lower (frem x, y) into (sub x, (mul (ftrunc (div x, y)) y)),
// i.e. "poor man's fmod()"
// frem - f32 FTZ
def : Pat<(frem Float32Regs:$x, Float32Regs:$y),
(FSUBf32rr_ftz Float32Regs:$x, (FMULf32rr_ftz (CVT_f32_f32
- (FDIV32rr_prec_ftz Float32Regs:$x, Float32Regs:$y), CvtRMI_FTZ),
+ (FDIV32rr_prec_ftz Float32Regs:$x, Float32Regs:$y), CvtRZI_FTZ),
Float32Regs:$y))>,
Requires<[doF32FTZ]>;
def : Pat<(frem Float32Regs:$x, fpimm:$y),
(FSUBf32rr_ftz Float32Regs:$x, (FMULf32ri_ftz (CVT_f32_f32
- (FDIV32ri_prec_ftz Float32Regs:$x, fpimm:$y), CvtRMI_FTZ),
+ (FDIV32ri_prec_ftz Float32Regs:$x, fpimm:$y), CvtRZI_FTZ),
fpimm:$y))>,
Requires<[doF32FTZ]>;
// frem - f32
def : Pat<(frem Float32Regs:$x, Float32Regs:$y),
(FSUBf32rr Float32Regs:$x, (FMULf32rr (CVT_f32_f32
- (FDIV32rr_prec Float32Regs:$x, Float32Regs:$y), CvtRMI),
+ (FDIV32rr_prec Float32Regs:$x, Float32Regs:$y), CvtRZI),
Float32Regs:$y))>;
def : Pat<(frem Float32Regs:$x, fpimm:$y),
(FSUBf32rr Float32Regs:$x, (FMULf32ri (CVT_f32_f32
- (FDIV32ri_prec Float32Regs:$x, fpimm:$y), CvtRMI),
+ (FDIV32ri_prec Float32Regs:$x, fpimm:$y), CvtRZI),
fpimm:$y))>;
// frem - f64
def : Pat<(frem Float64Regs:$x, Float64Regs:$y),
(FSUBf64rr Float64Regs:$x, (FMULf64rr (CVT_f64_f64
- (FDIV64rr Float64Regs:$x, Float64Regs:$y), CvtRMI),
+ (FDIV64rr Float64Regs:$x, Float64Regs:$y), CvtRZI),
Float64Regs:$y))>;
def : Pat<(frem Float64Regs:$x, fpimm:$y),
(FSUBf64rr Float64Regs:$x, (FMULf64ri (CVT_f64_f64
- (FDIV64ri Float64Regs:$x, fpimm:$y), CvtRMI),
+ (FDIV64ri Float64Regs:$x, fpimm:$y), CvtRZI),
fpimm:$y))>;
//-----------------------------------
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D102528.345539.patch
Type: text/x-patch
Size: 4605 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20210514/d2a2f8cc/attachment.bin>
More information about the llvm-commits
mailing list