[flang-commits] [flang] 69b3303 - [flang] Lower numeric related instrinsics

Wed Mar 16 12:43:15 PDT 2022

Author: Valentin Clement
Date: 2022-03-16T20:43:07+01:00
New Revision: 69b3303e6ba0af7a805a8be798196de2ccad47ef

URL: https://github.com/llvm/llvm-project/commit/69b3303e6ba0af7a805a8be798196de2ccad47ef
DIFF: https://github.com/llvm/llvm-project/commit/69b3303e6ba0af7a805a8be798196de2ccad47ef.diff

LOG: [flang] Lower numeric related instrinsics

This patch adds lowering for some numeric related
intrinsics:
- `exponent`
- `floor`
- `fraction`
- `mod`
- `modulo`
- `nint`
- `not`
- `product`

This patch is part of the upstreaming effort from fir-dev branch.

Reviewed By: PeteSteinfeld, schweitz

Differential Revision: https://reviews.llvm.org/D121828

Co-authored-by: Peter Steinfeld <psteinfeld at nvidia.com>
Co-authored-by: Jean Perier <jperier at nvidia.com>
Co-authored-by: mleair <leairmark at gmail.com>
Co-authored-by: Eric Schweitz <eschweitz at nvidia.com>

Added: 
    flang/test/Lower/Intrinsics/exponent.f90
    flang/test/Lower/Intrinsics/floor.f90
    flang/test/Lower/Intrinsics/fraction.f90
    flang/test/Lower/Intrinsics/modulo.f90
    flang/test/Lower/Intrinsics/nint.f90
    flang/test/Lower/Intrinsics/not.f90
    flang/test/Lower/Intrinsics/product.f90

Modified: 
    flang/lib/Lower/IntrinsicCall.cpp

Removed: 
    


################################################################################
diff  --git a/flang/lib/Lower/IntrinsicCall.cpp b/flang/lib/Lower/IntrinsicCall.cpp
index 94b3432b1c633..a65245e0128d9 100644

--- a/flang/lib/Lower/IntrinsicCall.cpp
+++ b/flang/lib/Lower/IntrinsicCall.cpp
@@ -453,8 +453,12 @@ struct IntrinsicLibrary {
   fir::ExtendedValue genDotProduct(mlir::Type,
                                    llvm::ArrayRef<fir::ExtendedValue>);
   fir::ExtendedValue genEoshift(mlir::Type, llvm::ArrayRef<fir::ExtendedValue>);
+  mlir::Value genExponent(mlir::Type, llvm::ArrayRef<mlir::Value>);
   template <Extremum, ExtremumBehavior>
   mlir::Value genExtremum(mlir::Type, llvm::ArrayRef<mlir::Value>);
+  mlir::Value genFloor(mlir::Type, llvm::ArrayRef<mlir::Value>);
+  mlir::Value genFraction(mlir::Type resultType,
+                          mlir::ArrayRef<mlir::Value> args);
   /// Lowering for the IAND intrinsic. The IAND intrinsic expects two arguments
   /// in the llvm::ArrayRef.
   mlir::Value genIand(mlir::Type, llvm::ArrayRef<mlir::Value>);
@@ -466,13 +470,18 @@ struct IntrinsicLibrary {
   mlir::Value genIshft(mlir::Type, llvm::ArrayRef<mlir::Value>);
   mlir::Value genIshftc(mlir::Type, llvm::ArrayRef<mlir::Value>);
   fir::ExtendedValue genLbound(mlir::Type, llvm::ArrayRef<fir::ExtendedValue>);
-  fir::ExtendedValue genNull(mlir::Type, llvm::ArrayRef<fir::ExtendedValue>);
   fir::ExtendedValue genLen(mlir::Type, llvm::ArrayRef<fir::ExtendedValue>);
   fir::ExtendedValue genLenTrim(mlir::Type, llvm::ArrayRef<fir::ExtendedValue>);
   fir::ExtendedValue genMaxloc(mlir::Type, llvm::ArrayRef<fir::ExtendedValue>);
   fir::ExtendedValue genMaxval(mlir::Type, llvm::ArrayRef<fir::ExtendedValue>);
   fir::ExtendedValue genMinloc(mlir::Type, llvm::ArrayRef<fir::ExtendedValue>);
   fir::ExtendedValue genMinval(mlir::Type, llvm::ArrayRef<fir::ExtendedValue>);
+  mlir::Value genMod(mlir::Type, llvm::ArrayRef<mlir::Value>);
+  mlir::Value genModulo(mlir::Type, llvm::ArrayRef<mlir::Value>);
+  mlir::Value genNint(mlir::Type, llvm::ArrayRef<mlir::Value>);
+  mlir::Value genNot(mlir::Type, llvm::ArrayRef<mlir::Value>);
+  fir::ExtendedValue genNull(mlir::Type, llvm::ArrayRef<fir::ExtendedValue>);
+  fir::ExtendedValue genProduct(mlir::Type, llvm::ArrayRef<fir::ExtendedValue>);
   void genRandomInit(llvm::ArrayRef<fir::ExtendedValue>);
   void genRandomNumber(llvm::ArrayRef<fir::ExtendedValue>);
   void genRandomSeed(llvm::ArrayRef<fir::ExtendedValue>);
@@ -636,6 +645,9 @@ static constexpr IntrinsicHandler handlers[]{
        {"boundary", asBox, handleDynamicOptional},
        {"dim", asValue}}},
      /*isElemental=*/false},
+    {"exponent", &I::genExponent},
+    {"floor", &I::genFloor},
+    {"fraction", &I::genFraction},
     {"iachar", &I::genIchar},
     {"iand", &I::genIand},
     {"ibclr", &I::genIbclr},
@@ -684,7 +696,17 @@ static constexpr IntrinsicHandler handlers[]{
        {"dim", asValue},
        {"mask", asBox, handleDynamicOptional}}},
      /*isElemental=*/false},
+    {"mod", &I::genMod},
+    {"modulo", &I::genModulo},
+    {"nint", &I::genNint},
+    {"not", &I::genNot},
     {"null", &I::genNull, {{{"mold", asInquired}}}, /*isElemental=*/false},
+    {"product",
+     &I::genProduct,
+     {{{"array", asBox},
+       {"dim", asValue},
+       {"mask", asBox, handleDynamicOptional}}},
+     /*isElemental=*/false},
     {"random_init",
      &I::genRandomInit,
      {{{"repeatable", asValue}, {"image_distinct", asValue}}},
@@ -795,12 +817,33 @@ static mlir::FunctionType genF64F64F64FuncType(mlir::MLIRContext *context) {
   return mlir::FunctionType::get(context, {t, t}, {t});
 }
 
+template <int Bits>
+static mlir::FunctionType genIntF64FuncType(mlir::MLIRContext *context) {
+  auto t = mlir::FloatType::getF64(context);
+  auto r = mlir::IntegerType::get(context, Bits);
+  return mlir::FunctionType::get(context, {t}, {r});
+}
+
+template <int Bits>
+static mlir::FunctionType genIntF32FuncType(mlir::MLIRContext *context) {
+  auto t = mlir::FloatType::getF32(context);
+  auto r = mlir::IntegerType::get(context, Bits);
+  return mlir::FunctionType::get(context, {t}, {r});
+}
+
 // TODO : Fill-up this table with more intrinsic.
 // Note: These are also defined as operations in LLVM dialect. See if this
 // can be use and has advantages.
 static constexpr RuntimeFunction llvmIntrinsics[] = {
     {"abs", "llvm.fabs.f32", genF32F32FuncType},
     {"abs", "llvm.fabs.f64", genF64F64FuncType},
+    // llvm.floor is used for FLOOR, but returns real.
+    {"floor", "llvm.floor.f32", genF32F32FuncType},
+    {"floor", "llvm.floor.f64", genF64F64FuncType},
+    {"nint", "llvm.lround.i64.f64", genIntF64FuncType<64>},
+    {"nint", "llvm.lround.i64.f32", genIntF32FuncType<64>},
+    {"nint", "llvm.lround.i32.f64", genIntF64FuncType<32>},
+    {"nint", "llvm.lround.i32.f32", genIntF32FuncType<32>},
     {"pow", "llvm.pow.f32", genF32F32F32FuncType},
     {"pow", "llvm.pow.f64", genF64F64F64FuncType},
 };
@@ -1890,6 +1933,38 @@ IntrinsicLibrary::genEoshift(mlir::Type resultType,
                            "unexpected result for EOSHIFT");
 }
 
+// EXPONENT
+mlir::Value IntrinsicLibrary::genExponent(mlir::Type resultType,
+                                          llvm::ArrayRef<mlir::Value> args) {
+  assert(args.size() == 1);
+
+  return builder.createConvert(
+      loc, resultType,
+      fir::runtime::genExponent(builder, loc, resultType,
+                                fir::getBase(args[0])));
+}
+
+// FLOOR
+mlir::Value IntrinsicLibrary::genFloor(mlir::Type resultType,
+                                       llvm::ArrayRef<mlir::Value> args) {
+  // Optional KIND argument.
+  assert(args.size() >= 1);
+  mlir::Value arg = args[0];
+  // Use LLVM floor that returns real.
+  mlir::Value floor = genRuntimeCall("floor", arg.getType(), {arg});
+  return builder.createConvert(loc, resultType, floor);
+}
+
+// FRACTION
+mlir::Value IntrinsicLibrary::genFraction(mlir::Type resultType,
+                                          llvm::ArrayRef<mlir::Value> args) {
+  assert(args.size() == 1);
+
+  return builder.createConvert(
+      loc, resultType,
+      fir::runtime::genFraction(builder, loc, fir::getBase(args[0])));
+}
+
 // IAND
 mlir::Value IntrinsicLibrary::genIand(mlir::Type resultType,
                                       llvm::ArrayRef<mlir::Value> args) {
@@ -2238,6 +2313,83 @@ mlir::Value IntrinsicLibrary::genExtremum(mlir::Type,
   return result;
 }
 
+// MOD
+mlir::Value IntrinsicLibrary::genMod(mlir::Type resultType,
+                                     llvm::ArrayRef<mlir::Value> args) {
+  assert(args.size() == 2);
+  if (resultType.isa<mlir::IntegerType>())
+    return builder.create<mlir::arith::RemSIOp>(loc, args[0], args[1]);
+
+  // Use runtime. Note that mlir::arith::RemFOp implements floating point
+  // remainder, but it does not work with fir::Real type.
+  // TODO: consider using mlir::arith::RemFOp when possible, that may help
+  // folding and  optimizations.
+  return genRuntimeCall("mod", resultType, args);
+}
+
+// MODULO
+mlir::Value IntrinsicLibrary::genModulo(mlir::Type resultType,
+                                        llvm::ArrayRef<mlir::Value> args) {
+  assert(args.size() == 2);
+  // No floored modulo op in LLVM/MLIR yet. TODO: add one to MLIR.
+  // In the meantime, use a simple inlined implementation based on truncated
+  // modulo (MOD(A, P) implemented by RemIOp, RemFOp). This avoids making manual
+  // division and multiplication from MODULO formula.
+  //  - If A/P > 0 or MOD(A,P)=0, then INT(A/P) = FLOOR(A/P), and MODULO = MOD.
+  //  - Otherwise, when A/P < 0 and MOD(A,P) !=0, then MODULO(A, P) =
+  //    A-FLOOR(A/P)*P = A-(INT(A/P)-1)*P = A-INT(A/P)*P+P = MOD(A,P)+P
+  // Note that A/P < 0 if and only if A and P signs are 
diff erent.
+  if (resultType.isa<mlir::IntegerType>()) {
+    auto remainder =
+        builder.create<mlir::arith::RemSIOp>(loc, args[0], args[1]);
+    auto argXor = builder.create<mlir::arith::XOrIOp>(loc, args[0], args[1]);
+    mlir::Value zero = builder.createIntegerConstant(loc, argXor.getType(), 0);
+    auto argSignDifferent = builder.create<mlir::arith::CmpIOp>(
+        loc, mlir::arith::CmpIPredicate::slt, argXor, zero);
+    auto remainderIsNotZero = builder.create<mlir::arith::CmpIOp>(
+        loc, mlir::arith::CmpIPredicate::ne, remainder, zero);
+    auto mustAddP = builder.create<mlir::arith::AndIOp>(loc, remainderIsNotZero,
+                                                        argSignDifferent);
+    auto remPlusP =
+        builder.create<mlir::arith::AddIOp>(loc, remainder, args[1]);
+    return builder.create<mlir::arith::SelectOp>(loc, mustAddP, remPlusP,
+                                                 remainder);
+  }
+  // Real case
+  auto remainder = builder.create<mlir::arith::RemFOp>(loc, args[0], args[1]);
+  mlir::Value zero = builder.createRealZeroConstant(loc, remainder.getType());
+  auto remainderIsNotZero = builder.create<mlir::arith::CmpFOp>(
+      loc, mlir::arith::CmpFPredicate::UNE, remainder, zero);
+  auto aLessThanZero = builder.create<mlir::arith::CmpFOp>(
+      loc, mlir::arith::CmpFPredicate::OLT, args[0], zero);
+  auto pLessThanZero = builder.create<mlir::arith::CmpFOp>(
+      loc, mlir::arith::CmpFPredicate::OLT, args[1], zero);
+  auto argSignDifferent =
+      builder.create<mlir::arith::XOrIOp>(loc, aLessThanZero, pLessThanZero);
+  auto mustAddP = builder.create<mlir::arith::AndIOp>(loc, remainderIsNotZero,
+                                                      argSignDifferent);
+  auto remPlusP = builder.create<mlir::arith::AddFOp>(loc, remainder, args[1]);
+  return builder.create<mlir::arith::SelectOp>(loc, mustAddP, remPlusP,
+                                               remainder);
+}
+
+// NINT
+mlir::Value IntrinsicLibrary::genNint(mlir::Type resultType,
+                                      llvm::ArrayRef<mlir::Value> args) {
+  assert(args.size() >= 1);
+  // Skip optional kind argument to search the runtime; it is already reflected
+  // in result type.
+  return genRuntimeCall("nint", resultType, {args[0]});
+}
+
+// NOT
+mlir::Value IntrinsicLibrary::genNot(mlir::Type resultType,
+                                     llvm::ArrayRef<mlir::Value> args) {
+  assert(args.size() == 1);
+  mlir::Value allOnes = builder.createIntegerConstant(loc, resultType, -1);
+  return builder.create<mlir::arith::XOrIOp>(loc, args[0], allOnes);
+}
+
 // NULL
 fir::ExtendedValue
 IntrinsicLibrary::genNull(mlir::Type, llvm::ArrayRef<fir::ExtendedValue> args) {
@@ -2255,6 +2407,15 @@ IntrinsicLibrary::genNull(mlir::Type, llvm::ArrayRef<fir::ExtendedValue> args) {
   return fir::MutableBoxValue(boxStorage, mold->nonDeferredLenParams(), {});
 }
 
+// PRODUCT
+fir::ExtendedValue
+IntrinsicLibrary::genProduct(mlir::Type resultType,
+                             llvm::ArrayRef<fir::ExtendedValue> args) {
+  return genProdOrSum(fir::runtime::genProduct, fir::runtime::genProductDim,
+                      resultType, builder, loc, stmtCtx,
+                      "unexpected result for Product", args);
+}
+
 // RANDOM_INIT
 void IntrinsicLibrary::genRandomInit(llvm::ArrayRef<fir::ExtendedValue> args) {
   assert(args.size() == 2);

diff  --git a/flang/test/Lower/Intrinsics/exponent.f90 b/flang/test/Lower/Intrinsics/exponent.f90
new file mode 100644
index 0000000000000..5d910b521d09f
--- /dev/null
+++ b/flang/test/Lower/Intrinsics/exponent.f90
@@ -0,0 +1,41 @@
+! RUN: bbc -emit-fir %s -o - | FileCheck %s
+
+! EXPONENT
+! CHECK-LABEL: exponent_test
+subroutine exponent_test
+
+    integer :: i1, i2, i3, i4
+  ! CHECK: %[[i0:.*]] = fir.alloca i32 {bindc_name = "i1", uniq_name = "_QFexponent_testEi1"}
+  ! CHECK: %[[i1:.*]] = fir.alloca i32 {bindc_name = "i2", uniq_name = "_QFexponent_testEi2"}
+  ! CHECK: %[[i2:.*]] = fir.alloca i32 {bindc_name = "i3", uniq_name = "_QFexponent_testEi3"}
+  ! CHECK: %[[i3:.*]] = fir.alloca i32 {bindc_name = "i4", uniq_name = "_QFexponent_testEi4"}
+  
+    real(kind = 4) :: x1
+    real(kind = 8) :: x2
+    real(kind = 10) :: x3
+    real(kind = 16) :: x4
+  ! CHECK: %[[x0:.*]] = fir.alloca f32 {bindc_name = "x1", uniq_name = "_QFexponent_testEx1"}
+  ! CHECK: %[[x1:.*]] = fir.alloca f64 {bindc_name = "x2", uniq_name = "_QFexponent_testEx2"}
+  ! CHECK: %[[x2:.*]] = fir.alloca f80 {bindc_name = "x3", uniq_name = "_QFexponent_testEx3"}
+  ! CHECK: %[[x3:.*]] = fir.alloca f128 {bindc_name = "x4", uniq_name = "_QFexponent_testEx4"}
+  
+    i1 = exponent(x1)
+  ! CHECK: %[[temp0:.*]] = fir.load %[[x0:.*]] : !fir.ref<f32>
+  ! CHECK: %[[result0:.*]] = fir.call @_FortranAExponent4_4(%[[temp0:.*]]) : (f32) -> i32
+  ! CHECK: fir.store %[[result0:.*]] to %[[i0:.*]] : !fir.ref<i32>
+  
+    i2 = exponent(x2)
+  ! CHECK: %[[temp1:.*]] = fir.load %[[x1:.*]] : !fir.ref<f64>
+  ! CHECK: %[[result1:.*]] = fir.call @_FortranAExponent8_4(%[[temp1:.*]]) : (f64) -> i32
+  ! CHECK: fir.store %[[result1:.*]] to %[[i1:.*]] : !fir.ref<i32>
+  
+    i3 = exponent(x3)
+  ! CHECK: %[[temp2:.*]] = fir.load %[[x2:.*]] : !fir.ref<f80>
+  ! CHECK: %[[result2:.*]] = fir.call @_FortranAExponent10_4(%[[temp2:.*]]) : (f80) -> i32
+  ! CHECK: fir.store %[[result2:.*]] to %[[i2:.*]] : !fir.ref<i32>
+  
+    i4 = exponent(x4)
+  ! CHECK: %[[temp3:.*]] = fir.load %[[x3:.*]] : !fir.ref<f128>
+  ! CHECK: %[[result3:.*]] = fir.call @_FortranAExponent16_4(%[[temp3:.*]]) : (f128) -> i32
+  ! CHECK: fir.store %[[result3:.*]] to %[[i3:.*]] : !fir.ref<i32>
+  end subroutine exponent_test

diff  --git a/flang/test/Lower/Intrinsics/floor.f90 b/flang/test/Lower/Intrinsics/floor.f90
new file mode 100644
index 0000000000000..a793b04048a27
--- /dev/null
+++ b/flang/test/Lower/Intrinsics/floor.f90
@@ -0,0 +1,19 @@
+! RUN: bbc -emit-fir %s -o - | FileCheck %s
+
+! CHECK-LABEL: floor_test1
+subroutine floor_test1(i, a)
+    integer :: i
+    real :: a
+    i = floor(a)
+    ! CHECK: %[[f:.*]] = fir.call @llvm.floor.f32
+    ! CHECK: fir.convert %[[f]] : (f32) -> i32
+  end subroutine
+  ! CHECK-LABEL: floor_test2
+  subroutine floor_test2(i, a)
+    integer(8) :: i
+    real :: a
+    i = floor(a, 8)
+    ! CHECK: %[[f:.*]] = fir.call @llvm.floor.f32
+    ! CHECK: fir.convert %[[f]] : (f32) -> i64
+  end subroutine
+  
\ No newline at end of file

diff  --git a/flang/test/Lower/Intrinsics/fraction.f90 b/flang/test/Lower/Intrinsics/fraction.f90
new file mode 100644
index 0000000000000..98267862fe3ec
--- /dev/null
+++ b/flang/test/Lower/Intrinsics/fraction.f90
@@ -0,0 +1,35 @@
+! RUN: bbc -emit-fir %s -o - | FileCheck %s
+ 
+! FRACTION
+! CHECK-LABE: fraction_test
+subroutine fraction_test
+
+    real(kind=4) :: x1 = 178.1387e-4
+    real(kind=8) :: x2 = 178.1387e-4
+    real(kind=10) :: x3 = 178.1387e-4
+    real(kind=16) :: x4 = 178.1387e-4
+  ! CHECK: %[[r0:.*]] = fir.address_of(@_QFfraction_testEx1) : !fir.ref<f32>
+  ! CHECK: %[[r1:.*]] = fir.address_of(@_QFfraction_testEx2) : !fir.ref<f64>
+  ! CHECK: %[[r2:.*]] = fir.address_of(@_QFfraction_testEx3) : !fir.ref<f80>
+  ! CHECK: %[[r3:.*]] = fir.address_of(@_QFfraction_testEx4) : !fir.ref<f128>
+  
+    x1 = fraction(x1)
+  ! CHECK: %[[temp0:.*]] = fir.load %[[r0:.*]] : !fir.ref<f32>
+  ! CHECK: %[[result0:.*]] = fir.call @_FortranAFraction4(%[[temp0:.*]]) : (f32) -> f32
+  ! CHECK: fir.store %[[result0:.*]] to %[[r0:.*]] : !fir.ref<f32>
+  
+    x2 = fraction(x2)
+  ! CHECK: %[[temp1:.*]] = fir.load %[[r1:.*]] : !fir.ref<f64>
+  ! CHECK: %[[result1:.*]] = fir.call @_FortranAFraction8(%[[temp1:.*]]) : (f64) -> f64
+  ! CHECK: fir.store %[[result1:.*]] to %[[r1:.*]] : !fir.ref<f64>
+  
+    x3 = fraction(x3)
+  ! CHECK: %[[temp2:.*]] = fir.load %[[r2:.*]] : !fir.ref<f80>
+  ! CHECK: %[[result2:.*]] = fir.call @_FortranAFraction10(%[[temp2:.*]]) : (f80) -> f80
+  ! CHECK: fir.store %[[result2:.*]] to %[[r2:.*]] : !fir.ref<f80>
+  
+    x4 = fraction(x4)
+  ! CHECK: %[[temp3:.*]] = fir.load %[[r3:.*]] : !fir.ref<f128>
+  ! CHECK: %[[result3:.*]] = fir.call @_FortranAFraction16(%[[temp3:.*]]) : (f128) -> f128
+  ! CHECK: fir.store %[[result3:.*]] to %[[r3:.*]] : !fir.ref<f128>
+  end subroutine fraction_test

diff  --git a/flang/test/Lower/Intrinsics/modulo.f90 b/flang/test/Lower/Intrinsics/modulo.f90
new file mode 100644
index 0000000000000..27af7f586d3bb
--- /dev/null
+++ b/flang/test/Lower/Intrinsics/modulo.f90
@@ -0,0 +1,38 @@
+! RUN: bbc -emit-fir %s -o - | FileCheck %s
+
+! CHECK-LABEL: func @_QPmodulo_testr(
+! CHECK-SAME: %[[arg0:.*]]: !fir.ref<f64>{{.*}}, %[[arg1:.*]]: !fir.ref<f64>{{.*}}, %[[arg2:.*]]: !fir.ref<f64>{{.*}}) {
+subroutine modulo_testr(r, a, p)
+    real(8) :: r, a, p
+    ! CHECK-DAG: %[[a:.*]] = fir.load %[[arg1]] : !fir.ref<f64>
+    ! CHECK-DAG: %[[p:.*]] = fir.load %[[arg2]] : !fir.ref<f64>
+    ! CHECK-DAG: %[[rem:.*]] = arith.remf %[[a]], %[[p]] : f64
+    ! CHECK-DAG: %[[zero:.*]] = arith.constant 0.000000e+00 : f64
+    ! CHECK-DAG: %[[remNotZero:.*]] = arith.cmpf une, %[[rem]], %[[zero]] : f64
+    ! CHECK-DAG: %[[aNeg:.*]] = arith.cmpf olt, %[[a]], %[[zero]] : f64
+    ! CHECK-DAG: %[[pNeg:.*]] = arith.cmpf olt, %[[p]], %[[zero]] : f64
+    ! CHECK-DAG: %[[signDifferent:.*]] = arith.xori %[[aNeg]], %[[pNeg]] : i1
+    ! CHECK-DAG: %[[mustAddP:.*]] = arith.andi %[[remNotZero]], %[[signDifferent]] : i1
+    ! CHECK-DAG: %[[remPlusP:.*]] = arith.addf %[[rem]], %[[p]] : f64
+    ! CHECK: %[[res:.*]] = arith.select %[[mustAddP]], %[[remPlusP]], %[[rem]] : f64
+    ! CHECK: fir.store %[[res]] to %[[arg0]] : !fir.ref<f64>
+    r = modulo(a, p)
+  end subroutine
+  
+  ! CHECK-LABEL: func @_QPmodulo_testi(
+  ! CHECK-SAME: %[[arg0:.*]]: !fir.ref<i64>{{.*}}, %[[arg1:.*]]: !fir.ref<i64>{{.*}}, %[[arg2:.*]]: !fir.ref<i64>{{.*}}) {
+  subroutine modulo_testi(r, a, p)
+    integer(8) :: r, a, p
+    ! CHECK-DAG: %[[a:.*]] = fir.load %[[arg1]] : !fir.ref<i64>
+    ! CHECK-DAG: %[[p:.*]] = fir.load %[[arg2]] : !fir.ref<i64>
+    ! CHECK-DAG: %[[rem:.*]] = arith.remsi %[[a]], %[[p]] : i64
+    ! CHECK-DAG: %[[argXor:.*]] = arith.xori %[[a]], %[[p]] : i64
+    ! CHECK-DAG: %[[signDifferent:.*]] = arith.cmpi slt, %[[argXor]], %c0{{.*}} : i64
+    ! CHECK-DAG: %[[remNotZero:.*]] = arith.cmpi ne, %[[rem]], %c0{{.*}} : i64
+    ! CHECK-DAG: %[[mustAddP:.*]] = arith.andi %[[remNotZero]], %[[signDifferent]] : i1
+    ! CHECK-DAG: %[[remPlusP:.*]] = arith.addi %[[rem]], %[[p]] : i64
+    ! CHECK: %[[res:.*]] = arith.select %[[mustAddP]], %[[remPlusP]], %[[rem]] : i64
+    ! CHECK: fir.store %[[res]] to %[[arg0]] : !fir.ref<i64>
+    r = modulo(a, p)
+  end subroutine
+  
\ No newline at end of file

diff  --git a/flang/test/Lower/Intrinsics/nint.f90 b/flang/test/Lower/Intrinsics/nint.f90
new file mode 100644
index 0000000000000..2f25eda5d20b2
--- /dev/null
+++ b/flang/test/Lower/Intrinsics/nint.f90
@@ -0,0 +1,17 @@
+! RUN: bbc -emit-fir %s -o - | FileCheck %s
+
+! CHECK-LABEL: nint_test1
+subroutine nint_test1(i, a)
+    integer :: i
+    real :: a
+    i = nint(a)
+    ! CHECK: fir.call @llvm.lround.i32.f32
+  end subroutine
+  ! CHECK-LABEL: nint_test2
+  subroutine nint_test2(i, a)
+    integer(8) :: i
+    real(8) :: a
+    i = nint(a, 8)
+    ! CHECK: fir.call @llvm.lround.i64.f64
+  end subroutine
+  
\ No newline at end of file

diff  --git a/flang/test/Lower/Intrinsics/not.f90 b/flang/test/Lower/Intrinsics/not.f90
new file mode 100644
index 0000000000000..e900e211261b1
--- /dev/null
+++ b/flang/test/Lower/Intrinsics/not.f90
@@ -0,0 +1,17 @@
+! RUN: bbc -emit-fir %s -o - | FileCheck %s
+
+! CHECK-LABEL: not_test
+subroutine not_test
+    integer :: source
+    integer :: destination
+    ! CHECK_LABEL: not_test
+    ! CHECK: %[[dest:.*]] = fir.alloca i32 {bindc_name = "destination", uniq_name = "_QFnot_testEdestination"}
+    ! CHECK: %[[source:.*]] = fir.alloca i32 {bindc_name = "source", uniq_name = "_QFnot_testEsource"}
+    ! CHECK: %[[loaded_source:.*]] = fir.load %[[source]] : !fir.ref<i32>
+    ! CHECK: %[[all_ones:.*]] = arith.constant -1 : i32
+    ! CHECK: %[[result:.*]] = arith.xori %[[loaded_source]], %[[all_ones]] : i32
+    ! CHECK: fir.store %[[result]] to %[[dest]] : !fir.ref<i32>
+    ! CHECK: return
+    destination = not(source)
+  end subroutine
+  
\ No newline at end of file

diff  --git a/flang/test/Lower/Intrinsics/product.f90 b/flang/test/Lower/Intrinsics/product.f90
new file mode 100644
index 0000000000000..3042e126b2402
--- /dev/null
+++ b/flang/test/Lower/Intrinsics/product.f90
@@ -0,0 +1,134 @@
+! RUN: bbc -emit-fir %s -o - | FileCheck %s
+
+! CHECK-LABEL: func @_QPproduct_test(
+! CHECK-SAME: %[[arg0:.*]]: !fir.box<!fir.array<?xi32>>{{.*}}) -> i32
+integer function product_test(a)
+integer :: a(:)
+! CHECK-DAG:  %[[c0:.*]] = arith.constant 0 : index
+! CHECK-DAG:  %[[a1:.*]] = fir.absent !fir.box<i1>
+! CHECK-DAG: %[[a3:.*]] = fir.convert %[[arg0]] : (!fir.box<!fir.array<?xi32>>) -> !fir.box<none>
+! CHECK-DAG:  %[[a5:.*]] = fir.convert %[[c0]] : (index) -> i32
+! CHECK-DAG:  %[[a6:.*]] = fir.convert %[[a1]] : (!fir.box<i1>) -> !fir.box<none>
+product_test = product(a)
+! CHECK:  %{{.*}} = fir.call @_FortranAProductInteger4(%[[a3]], %{{.*}}, %{{.*}}, %[[a5]], %[[a6]]) : (!fir.box<none>, !fir.ref<i8>, i32, i32, !fir.box<none>) -> i32
+end function
+
+! CHECK-LABEL: func @_QPproduct_test2(
+! CHECK-SAME: %[[arg0:.*]]: !fir.box<!fir.array<?x?xi32>>{{.*}}, %[[arg1:.*]]: !fir.box<!fir.array<?xi32>>
+subroutine product_test2(a,r)
+integer :: a(:,:)
+integer :: r(:)
+! CHECK-DAG:  %[[c2_i32:.*]] = arith.constant 2 : i32
+! CHECK-DAG:  %[[a0:.*]] = fir.alloca !fir.box<!fir.heap<!fir.array<?xi32>>>
+! CHECK-DAG:  %[[a1:.*]] = fir.absent !fir.box<i1>
+! CHECK-DAG:  %[[a6:.*]] = fir.convert %[[a0]] : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> !fir.ref<!fir.box<none>>
+! CHECK-DAG:  %[[a7:.*]] = fir.convert %[[arg0]] : (!fir.box<!fir.array<?x?xi32>>) -> !fir.box<none>
+! CHECK-DAG:  %[[a9:.*]] = fir.convert %[[a1]] : (!fir.box<i1>) -> !fir.box<none>
+r = product(a,dim=2)
+! CHECK:  %{{.*}} = fir.call @_FortranAProductDim(%[[a6]], %[[a7]], %[[c2_i32]], %{{.*}}, %{{.*}}, %[[a9]]) : (!fir.ref<!fir.box<none>>, !fir.box<none>, i32, !fir.ref<i8>, i32, !fir.box<none>) -> none
+! CHECK-DAG: %[[a11:.*]] = fir.load %[[a0]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+! CHECK-DAG:  %[[a13:.*]] = fir.box_addr %[[a11]] : (!fir.box<!fir.heap<!fir.array<?xi32>>>) -> !fir.heap<!fir.array<?xi32>>
+! CHECK-DAG:  fir.freemem %[[a13]]
+end subroutine
+
+! CHECK-LABEL: func @_QPproduct_test3(
+! CHECK-SAME: %[[arg0:.*]]: !fir.box<!fir.array<?x!fir.complex<4>>>{{.*}}) -> !fir.complex<4>
+complex function product_test3(a)
+complex :: a(:)
+! CHECK-DAG:  %[[c0:.*]] = arith.constant 0 : index
+! CHECK-DAG:  %[[a0:.*]] = fir.alloca !fir.complex<4>
+! CHECK-DAG:  %[[a3:.*]] = fir.absent !fir.box<i1>
+! CHECK-DAG: %[[a5:.*]] = fir.convert %[[a0]] : (!fir.ref<!fir.complex<4>>) -> !fir.ref<complex<f32>>
+! CHECK-DAG:  %[[a6:.*]] = fir.convert %[[arg0]] : (!fir.box<!fir.array<?x!fir.complex<4>>>) -> !fir.box<none>
+! CHECK-DAG:  %[[a8:.*]] = fir.convert %[[c0]] : (index) -> i32
+! CHECK-DAG:  %[[a9:.*]] = fir.convert %[[a3]] : (!fir.box<i1>) -> !fir.box<none>
+product_test3 = product(a)
+! CHECK:  %{{.*}} = fir.call @_FortranACppProductComplex4(%[[a5]], %[[a6]], %{{.*}}, %{{.*}}, %[[a8]], %[[a9]]) : (!fir.ref<complex<f32>>, !fir.box<none>, !fir.ref<i8>, i32, i32, !fir.box<none>) -> none
+end function
+
+! CHECK-LABEL: func @_QPproduct_test4(
+! CHECK-SAME: %[[arg0:.*]]: !fir.box<!fir.array<?x!fir.complex<10>>>{{.*}}) -> !fir.complex<10>
+complex(10) function product_test4(x)
+complex(10):: x(:)
+! CHECK-DAG:  %[[c0:.*]] = arith.constant 0 : index
+! CHECK-DAG:  %[[a0:.*]] = fir.alloca !fir.complex<10>
+product_test4 = product(x)
+! CHECK-DAG: %[[a2:.*]] = fir.absent !fir.box<i1>
+! CHECK-DAG: %[[a4:.*]] = fir.convert %[[a0]] : (!fir.ref<!fir.complex<10>>) -> !fir.ref<complex<f80>>
+! CHECK-DAG: %[[a5:.*]] = fir.convert %[[arg0]] : (!fir.box<!fir.array<?x!fir.complex<10>>>) -> !fir.box<none>
+! CHECK-DAG:  %[[a7:.*]] = fir.convert %[[c0]] : (index) -> i32
+! CHECK-DAG:  %[[a8:.*]] = fir.convert %[[a2]] : (!fir.box<i1>) -> !fir.box<none>
+! CHECK: fir.call @_FortranACppProductComplex10(%[[a4]], %[[a5]], %{{.*}}, %{{.*}}, %[[a7]], %8) : (!fir.ref<complex<f80>>, !fir.box<none>, !fir.ref<i8>, i32, i32, !fir.box<none>) -> ()
+end
+
+! CHECK-LABEL: func @_QPproduct_test_optional(
+! CHECK-SAME:  %[[VAL_0:.*]]: !fir.box<!fir.array<?x!fir.logical<4>>>
+real function product_test_optional(mask, x)
+real :: x(:)
+logical, optional :: mask(:)
+product_test_optional = product(x, mask=mask)
+! CHECK:  %[[VAL_9:.*]] = fir.convert %[[VAL_0]] : (!fir.box<!fir.array<?x!fir.logical<4>>>) -> !fir.box<none>
+! CHECK:  fir.call @_FortranAProductReal4(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %[[VAL_9]]) : (!fir.box<none>, !fir.ref<i8>, i32, i32, !fir.box<none>) -> f32
+end function
+
+! CHECK-LABEL: func @_QPproduct_test_optional_2(
+! CHECK-SAME:  %[[VAL_0:.*]]: !fir.ref<!fir.box<!fir.ptr<!fir.array<?x!fir.logical<4>>>>>
+real function product_test_optional_2(mask, x)
+real :: x(:)
+logical, pointer :: mask(:)
+product_test_optional = product(x, mask=mask)
+! CHECK:  %[[VAL_4:.*]] = fir.load %[[VAL_0]] : !fir.ref<!fir.box<!fir.ptr<!fir.array<?x!fir.logical<4>>>>>
+! CHECK:  %[[VAL_5:.*]] = fir.box_addr %[[VAL_4]] : (!fir.box<!fir.ptr<!fir.array<?x!fir.logical<4>>>>) -> !fir.ptr<!fir.array<?x!fir.logical<4>>>
+! CHECK:  %[[VAL_6:.*]] = fir.convert %[[VAL_5]] : (!fir.ptr<!fir.array<?x!fir.logical<4>>>) -> i64
+! CHECK:  %[[VAL_7:.*]] = arith.constant 0 : i64
+! CHECK:  %[[VAL_8:.*]] = arith.cmpi ne, %[[VAL_6]], %[[VAL_7]] : i64
+! CHECK:  %[[VAL_9:.*]] = fir.load %[[VAL_0]] : !fir.ref<!fir.box<!fir.ptr<!fir.array<?x!fir.logical<4>>>>>
+! CHECK:  %[[VAL_10:.*]] = fir.absent !fir.box<!fir.ptr<!fir.array<?x!fir.logical<4>>>>
+! CHECK:  %[[VAL_11:.*]] = arith.select %[[VAL_8]], %[[VAL_9]], %[[VAL_10]] : !fir.box<!fir.ptr<!fir.array<?x!fir.logical<4>>>>
+! CHECK:  %[[VAL_18:.*]] = fir.convert %[[VAL_11]] : (!fir.box<!fir.ptr<!fir.array<?x!fir.logical<4>>>>) -> !fir.box<none>
+! CHECK:  fir.call @_FortranAProductReal4(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %[[VAL_18]]) : (!fir.box<none>, !fir.ref<i8>, i32, i32, !fir.box<none>) -> f32
+end function
+
+! CHECK-LABEL: func @_QPproduct_test_optional_3(
+! CHECK-SAME:  %[[VAL_0:.*]]: !fir.ref<!fir.array<10x!fir.logical<4>>>
+real function product_test_optional_3(mask, x)
+real :: x(:)
+logical, optional :: mask(10)
+product_test_optional = product(x, mask=mask)
+! CHECK:  %[[VAL_2:.*]] = arith.constant 10 : index
+! CHECK:  %[[VAL_5:.*]] = fir.is_present %[[VAL_0]] : (!fir.ref<!fir.array<10x!fir.logical<4>>>) -> i1
+! CHECK:  %[[VAL_6:.*]] = fir.shape %[[VAL_2]] : (index) -> !fir.shape<1>
+! CHECK:  %[[VAL_7:.*]] = fir.embox %[[VAL_0]](%[[VAL_6]]) : (!fir.ref<!fir.array<10x!fir.logical<4>>>, !fir.shape<1>) -> !fir.box<!fir.array<10x!fir.logical<4>>>
+! CHECK:  %[[VAL_8:.*]] = fir.absent !fir.box<!fir.array<10x!fir.logical<4>>>
+! CHECK:  %[[VAL_9:.*]] = arith.select %[[VAL_5]], %[[VAL_7]], %[[VAL_8]] : !fir.box<!fir.array<10x!fir.logical<4>>>
+! CHECK:  %[[VAL_18:.*]] = fir.convert %[[VAL_9]] : (!fir.box<!fir.array<10x!fir.logical<4>>>) -> !fir.box<none>
+! CHECK:  fir.call @_FortranAProductReal4(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %[[VAL_18]]) : (!fir.box<none>, !fir.ref<i8>, i32, i32, !fir.box<none>) -> f32
+end function
+
+! CHECK-LABEL: func @_QPproduct_test_optional_4(
+real function product_test_optional_4(x, use_mask)
+! Test that local allocatable tracked in local variables
+! are dealt as optional argument correctly.
+real :: x(:)
+logical :: use_mask
+logical, allocatable :: mask(:)
+if (use_mask) then 
+  allocate(mask(size(x, 1)))
+  call set_mask(mask)
+  ! CHECK: fir.call @_QPset_mask
+end if
+product_test_optional = product(x, mask=mask)
+! CHECK:  %[[VAL_20:.*]] = fir.load %[[VAL_3:.*]] : !fir.ref<!fir.heap<!fir.array<?x!fir.logical<4>>>>
+! CHECK:  %[[VAL_21:.*]] = fir.convert %[[VAL_20]] : (!fir.heap<!fir.array<?x!fir.logical<4>>>) -> i64
+! CHECK:  %[[VAL_22:.*]] = arith.constant 0 : i64
+! CHECK:  %[[VAL_23:.*]] = arith.cmpi ne, %[[VAL_21]], %[[VAL_22]] : i64
+! CHECK:  %[[VAL_24:.*]] = fir.load %[[VAL_4:.*]] : !fir.ref<index>
+! CHECK:  %[[VAL_25:.*]] = fir.load %[[VAL_5:.*]] : !fir.ref<index>
+! CHECK:  %[[VAL_26:.*]] = fir.load %[[VAL_3]] : !fir.ref<!fir.heap<!fir.array<?x!fir.logical<4>>>>
+! CHECK:  %[[VAL_27:.*]] = fir.shape_shift %[[VAL_24]], %[[VAL_25]] : (index, index) -> !fir.shapeshift<1>
+! CHECK:  %[[VAL_28:.*]] = fir.embox %[[VAL_26]](%[[VAL_27]]) : (!fir.heap<!fir.array<?x!fir.logical<4>>>, !fir.shapeshift<1>) -> !fir.box<!fir.array<?x!fir.logical<4>>>
+! CHECK:  %[[VAL_29:.*]] = fir.absent !fir.box<!fir.array<?x!fir.logical<4>>>
+! CHECK:  %[[VAL_30:.*]] = arith.select %[[VAL_23]], %[[VAL_28]], %[[VAL_29]] : !fir.box<!fir.array<?x!fir.logical<4>>>
+! CHECK:  %[[VAL_37:.*]] = fir.convert %[[VAL_30]] : (!fir.box<!fir.array<?x!fir.logical<4>>>) -> !fir.box<none>
+! CHECK:  fir.call @_FortranAProductReal4(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %[[VAL_37]]) : (!fir.box<none>, !fir.ref<i8>, i32, i32, !fir.box<none>) -> f32
+end function