[flang-commits] [flang] [llvm] [RISCV] Add DAG combine to turn (sub (shl X, 8-Y), (shr X, Y)) into orc.b (PR #111828)

Fri Oct 11 05:18:40 PDT 2024

https://github.com/damokeev updated https://github.com/llvm/llvm-project/pull/111828

>From 6b1807d9d2efb8446b00547aed66a1bedb3fb7d0 Mon Sep 17 00:00:00 2001
From: Daniel Mokeev <mokeev.gh at gmail.com>
Date: Wed, 9 Oct 2024 18:14:50 +0200
Subject: [PATCH 1/2] [RISCV] Add DAG combine to turn (sub (shl X, 8-Y), (shr
 X, Y)) into orc.b

This patch generalizes the DAG combine for (sub (shl X, 8), X) => (orc.b X)
into the more general form of (sub (shl X, 8 - Y), (srl X, Y)) => (orc.b X).

Alive2 generalized proof: https://alive2.llvm.org/ce/z/dFcf_n
Related issue: https://github.com/llvm/llvm-project/issues/96595
Related PR: https://github.com/llvm/llvm-project/pull/96680
---
 llvm/lib/Target/RISCV/RISCVISelLowering.cpp |  44 ++-
 llvm/test/CodeGen/RISCV/orc-b-patterns.ll   | 372 ++++++++++++++++++++
 2 files changed, 408 insertions(+), 8 deletions(-)
 create mode 100644 llvm/test/CodeGen/RISCV/orc-b-patterns.ll

diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 230ccd8209e1f2..fb7399ca13788b 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -13569,8 +13569,10 @@ static SDValue combineSubOfBoolean(SDNode *N, SelectionDAG &DAG) {
   return DAG.getNode(ISD::ADD, DL, VT, NewLHS, NewRHS);
 }
 
-// Looks for (sub (shl X, 8), X) where only bits 8, 16, 24, 32, etc. of X are
-// non-zero. Replace with orc.b.
+// Looks for (sub (shl X, 8-Y), (shr X, Y)) where the Y-th bit in each byte is
+// potentially set. It is fine for Y to be 0, meaning that (sub (shl X, 8), X)
+// is also valid. Replace with (orc.b X). For example, 0b0000_1000_0000_1000 is
+// valid with Y=3, while 0b0000_1000_0000_0100 is not.
 static SDValue combineSubShiftToOrcB(SDNode *N, SelectionDAG &DAG,
                                      const RISCVSubtarget &Subtarget) {
   if (!Subtarget.hasStdExtZbb())
@@ -13584,18 +13586,44 @@ static SDValue combineSubShiftToOrcB(SDNode *N, SelectionDAG &DAG,
   SDValue N0 = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
 
-  if (N0.getOpcode() != ISD::SHL || N0.getOperand(0) != N1 || !N0.hasOneUse())
+  if (N0->getOpcode() != ISD::SHL)
     return SDValue();
 
-  auto *ShAmtC = dyn_cast<ConstantSDNode>(N0.getOperand(1));
-  if (!ShAmtC || ShAmtC->getZExtValue() != 8)
+  auto *ShAmtCLeft = dyn_cast<ConstantSDNode>(N0.getOperand(1));
+  if (!ShAmtCLeft)
     return SDValue();
+  unsigned ShiftedAmount = 8 - ShAmtCLeft->getZExtValue();
 
-  APInt Mask = APInt::getSplat(VT.getSizeInBits(), APInt(8, 0xfe));
-  if (!DAG.MaskedValueIsZero(N1, Mask))
+  if (ShiftedAmount >= 8)
     return SDValue();
 
-  return DAG.getNode(RISCVISD::ORC_B, SDLoc(N), VT, N1);
+  SDValue LeftShiftOperand = N0->getOperand(0);
+  SDValue RightShiftOperand = N1;
+
+  if (ShiftedAmount != 0) { // Right operand must be a right shift.
+    if (N1->getOpcode() != ISD::SRL)
+      return SDValue();
+    auto *ShAmtCRight = dyn_cast<ConstantSDNode>(N1.getOperand(1));
+    if (!ShAmtCRight || ShAmtCRight->getZExtValue() != ShiftedAmount)
+      return SDValue();
+    RightShiftOperand = N1.getOperand(0);
+  }
+
+  // At least one shift should have a single use.
+  if (!N0.hasOneUse() && (ShiftedAmount == 0 || !N1.hasOneUse()))
+    return SDValue();
+
+  if (LeftShiftOperand != RightShiftOperand)
+    return SDValue();
+
+  APInt Mask = APInt::getSplat(VT.getSizeInBits(), APInt(8, 0x1));
+  Mask <<= ShiftedAmount;
+  // Check that X has indeed the right shape (only the Y-th bit can be set in
+  // every byte).
+  if (!DAG.MaskedValueIsZero(LeftShiftOperand, ~Mask))
+    return SDValue();
+
+  return DAG.getNode(RISCVISD::ORC_B, SDLoc(N), VT, LeftShiftOperand);
 }
 
 static SDValue performSUBCombine(SDNode *N, SelectionDAG &DAG,
diff --git a/llvm/test/CodeGen/RISCV/orc-b-patterns.ll b/llvm/test/CodeGen/RISCV/orc-b-patterns.ll
new file mode 100644
index 00000000000000..184e66c14b33fc
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/orc-b-patterns.ll
@@ -0,0 +1,372 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s -check-prefixes=CHECK,RV32I
+; RUN: llc -mtriple=riscv32 -mattr=+zbb -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s -check-prefixes=CHECK,RV32ZBB
+
+define i32 @orc_b_i32_mul255(i32 %x) nounwind {
+; RV32I-LABEL: orc_b_i32_mul255:
+; RV32I:       # %bb.0: # %entry
+; RV32I-NEXT:    lui a1, 4112
+; RV32I-NEXT:    addi a1, a1, 257
+; RV32I-NEXT:    and a0, a0, a1
+; RV32I-NEXT:    slli a1, a0, 8
+; RV32I-NEXT:    sub a0, a1, a0
+; RV32I-NEXT:    ret
+;
+; RV32ZBB-LABEL: orc_b_i32_mul255:
+; RV32ZBB:       # %bb.0: # %entry
+; RV32ZBB-NEXT:    lui a1, 4112
+; RV32ZBB-NEXT:    addi a1, a1, 257
+; RV32ZBB-NEXT:    and a0, a0, a1
+; RV32ZBB-NEXT:    orc.b a0, a0
+; RV32ZBB-NEXT:    ret
+entry:
+  %and = and i32 %x, 16843009
+  %mul = mul nuw nsw i32 %and, 255
+  ret i32 %mul
+}
+
+
+define i32 @orc_b_i32_sub_shl8x_x_lsb(i32  %x)  {
+; RV32I-LABEL: orc_b_i32_sub_shl8x_x_lsb:
+; RV32I:       # %bb.0: # %entry
+; RV32I-NEXT:    lui a1, 4112
+; RV32I-NEXT:    addi a1, a1, 257
+; RV32I-NEXT:    and a0, a0, a1
+; RV32I-NEXT:    slli a1, a0, 8
+; RV32I-NEXT:    sub a0, a1, a0
+; RV32I-NEXT:    ret
+;
+; RV32ZBB-LABEL: orc_b_i32_sub_shl8x_x_lsb:
+; RV32ZBB:       # %bb.0: # %entry
+; RV32ZBB-NEXT:    lui a1, 4112
+; RV32ZBB-NEXT:    addi a1, a1, 257
+; RV32ZBB-NEXT:    and a0, a0, a1
+; RV32ZBB-NEXT:    orc.b a0, a0
+; RV32ZBB-NEXT:    ret
+entry:
+  %and = and i32 %x, 16843009
+  %sub = mul nuw i32 %and, 255
+  ret i32 %sub
+}
+
+define i32 @orc_b_i32_sub_shl8x_x_lsb_preshifted(i32 %x){
+; RV32I-LABEL: orc_b_i32_sub_shl8x_x_lsb_preshifted:
+; RV32I:       # %bb.0: # %entry
+; RV32I-NEXT:    srli a0, a0, 11
+; RV32I-NEXT:    lui a1, 16
+; RV32I-NEXT:    addi a1, a1, 257
+; RV32I-NEXT:    and a0, a0, a1
+; RV32I-NEXT:    slli a1, a0, 8
+; RV32I-NEXT:    sub a0, a1, a0
+; RV32I-NEXT:    ret
+;
+; RV32ZBB-LABEL: orc_b_i32_sub_shl8x_x_lsb_preshifted:
+; RV32ZBB:       # %bb.0: # %entry
+; RV32ZBB-NEXT:    srli a0, a0, 11
+; RV32ZBB-NEXT:    lui a1, 16
+; RV32ZBB-NEXT:    addi a1, a1, 257
+; RV32ZBB-NEXT:    and a0, a0, a1
+; RV32ZBB-NEXT:    orc.b a0, a0
+; RV32ZBB-NEXT:    ret
+entry:
+  %shr = lshr i32 %x, 11
+  %and = and i32 %shr, 16843009
+  %sub = mul nuw i32 %and, 255
+  ret i32 %sub
+}
+
+
+define  i32 @orc_b_i32_sub_shl8x_x_b1(i32  %x)  {
+; RV32I-LABEL: orc_b_i32_sub_shl8x_x_b1:
+; RV32I:       # %bb.0: # %entry
+; RV32I-NEXT:    lui a1, 8224
+; RV32I-NEXT:    addi a1, a1, 514
+; RV32I-NEXT:    and a0, a0, a1
+; RV32I-NEXT:    slli a1, a0, 7
+; RV32I-NEXT:    srli a0, a0, 1
+; RV32I-NEXT:    sub a0, a1, a0
+; RV32I-NEXT:    ret
+;
+; RV32ZBB-LABEL: orc_b_i32_sub_shl8x_x_b1:
+; RV32ZBB:       # %bb.0: # %entry
+; RV32ZBB-NEXT:    lui a1, 8224
+; RV32ZBB-NEXT:    addi a1, a1, 514
+; RV32ZBB-NEXT:    and a0, a0, a1
+; RV32ZBB-NEXT:    orc.b a0, a0
+; RV32ZBB-NEXT:    ret
+entry:
+  %and = and i32 %x, 33686018
+  %shl = shl i32 %and, 7
+  %shr = lshr exact i32 %and, 1
+  %sub = sub nsw i32 %shl, %shr
+  ret i32 %sub
+}
+
+
+define  i32 @orc_b_i32_sub_shl8x_x_b2(i32  %x)  {
+; RV32I-LABEL: orc_b_i32_sub_shl8x_x_b2:
+; RV32I:       # %bb.0: # %entry
+; RV32I-NEXT:    lui a1, 16448
+; RV32I-NEXT:    addi a1, a1, 1028
+; RV32I-NEXT:    and a0, a0, a1
+; RV32I-NEXT:    slli a1, a0, 6
+; RV32I-NEXT:    srli a0, a0, 2
+; RV32I-NEXT:    sub a0, a1, a0
+; RV32I-NEXT:    ret
+;
+; RV32ZBB-LABEL: orc_b_i32_sub_shl8x_x_b2:
+; RV32ZBB:       # %bb.0: # %entry
+; RV32ZBB-NEXT:    lui a1, 16448
+; RV32ZBB-NEXT:    addi a1, a1, 1028
+; RV32ZBB-NEXT:    and a0, a0, a1
+; RV32ZBB-NEXT:    orc.b a0, a0
+; RV32ZBB-NEXT:    ret
+entry:
+  %and = and i32 %x, 67372036
+  %shl = shl i32 %and, 6
+  %shr = lshr exact i32 %and, 2
+  %sub = sub nsw i32 %shl, %shr
+  ret i32 %sub
+}
+
+
+define i32 @orc_b_i32_sub_shl8x_x_b3(i32  %x)  {
+; CHECK-LABEL: orc_b_i32_sub_shl8x_x_b3:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    lui a1, 24672
+; CHECK-NEXT:    addi a1, a1, 1542
+; CHECK-NEXT:    and a0, a0, a1
+; CHECK-NEXT:    slli a1, a0, 5
+; CHECK-NEXT:    srli a0, a0, 3
+; CHECK-NEXT:    sub a0, a1, a0
+; CHECK-NEXT:    ret
+entry:
+  %and = and i32 %x, 101058054
+  %shl = shl nuw i32 %and, 5
+  %shr = lshr i32 %and, 3
+  %sub = sub nsw i32 %shl, %shr
+  ret i32 %sub
+}
+
+
+define  i32 @orc_b_i32_sub_shl8x_x_b4(i32  %x)  {
+; CHECK-LABEL: orc_b_i32_sub_shl8x_x_b4:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    lui a1, 32897
+; CHECK-NEXT:    addi a1, a1, -2040
+; CHECK-NEXT:    and a0, a0, a1
+; CHECK-NEXT:    slli a1, a0, 4
+; CHECK-NEXT:    srli a0, a0, 4
+; CHECK-NEXT:    sub a0, a1, a0
+; CHECK-NEXT:    ret
+entry:
+  %and = and i32 %x, 134744072
+  %shl = shl nuw i32 %and, 4
+  %shr = lshr i32 %and, 4
+  %sub = sub nsw i32 %shl, %shr
+  ret i32 %sub
+}
+
+
+define  i32 @orc_b_i32_sub_shl8x_x_b5(i32  %x)  {
+; CHECK-LABEL: orc_b_i32_sub_shl8x_x_b5:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    lui a1, 65793
+; CHECK-NEXT:    addi a1, a1, 16
+; CHECK-NEXT:    and a0, a0, a1
+; CHECK-NEXT:    slli a1, a0, 3
+; CHECK-NEXT:    srli a0, a0, 5
+; CHECK-NEXT:    sub a0, a1, a0
+; CHECK-NEXT:    ret
+entry:
+  %and = and i32 %x, 269488144
+  %shl = shl nuw i32 %and, 3
+  %shr = lshr i32 %and, 5
+  %sub = sub nsw i32 %shl, %shr
+  ret i32 %sub
+}
+
+
+define i32 @orc_b_i32_sub_shl8x_x_b6(i32 %x)  {
+; CHECK-LABEL: orc_b_i32_sub_shl8x_x_b6:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    lui a1, 131586
+; CHECK-NEXT:    addi a1, a1, 32
+; CHECK-NEXT:    and a0, a0, a1
+; CHECK-NEXT:    slli a1, a0, 2
+; CHECK-NEXT:    srli a0, a0, 6
+; CHECK-NEXT:    sub a0, a1, a0
+; CHECK-NEXT:    ret
+entry:
+  %and = and i32 %x, 538976288
+  %shl = shl nuw i32 %and, 2
+  %shr = lshr i32 %and, 6
+  %sub = sub nsw i32 %shl, %shr
+  ret i32 %sub
+}
+
+
+define i32 @orc_b_i32_sub_shl8x_x_b7(i32 %x)  {
+; CHECK-LABEL: orc_b_i32_sub_shl8x_x_b7:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    lui a1, 263172
+; CHECK-NEXT:    addi a1, a1, 64
+; CHECK-NEXT:    and a0, a0, a1
+; CHECK-NEXT:    slli a1, a0, 1
+; CHECK-NEXT:    srli a0, a0, 7
+; CHECK-NEXT:    sub a0, a1, a0
+; CHECK-NEXT:    ret
+entry:
+  %and = and i32 %x, 1077952576
+  %shl = shl nuw i32 %and, 1
+  %shr = lshr i32 %and, 7
+  %sub = sub nsw i32 %shl, %shr
+  ret i32 %sub
+}
+
+define i32 @orc_b_i32_sub_shl8x_x_b1_shl_used(i32 %x, ptr %arr) {
+; RV32I-LABEL: orc_b_i32_sub_shl8x_x_b1_shl_used:
+; RV32I:       # %bb.0: # %entry
+; RV32I-NEXT:    lui a2, 8224
+; RV32I-NEXT:    addi a2, a2, 514
+; RV32I-NEXT:    and a0, a0, a2
+; RV32I-NEXT:    slli a2, a0, 7
+; RV32I-NEXT:    srli a3, a0, 1
+; RV32I-NEXT:    sub a0, a2, a3
+; RV32I-NEXT:    sw a3, 0(a1)
+; RV32I-NEXT:    ret
+;
+; RV32ZBB-LABEL: orc_b_i32_sub_shl8x_x_b1_shl_used:
+; RV32ZBB:       # %bb.0: # %entry
+; RV32ZBB-NEXT:    lui a2, 8224
+; RV32ZBB-NEXT:    addi a2, a2, 514
+; RV32ZBB-NEXT:    and a0, a0, a2
+; RV32ZBB-NEXT:    srli a2, a0, 1
+; RV32ZBB-NEXT:    orc.b a0, a0
+; RV32ZBB-NEXT:    sw a2, 0(a1)
+; RV32ZBB-NEXT:    ret
+entry:
+  %and = and i32 %x, 33686018
+  %shl = shl i32 %and, 7
+  %shr = lshr exact i32 %and, 1
+  store i32 %shr, ptr %arr, align 4
+  %sub = sub nsw i32 %shl, %shr
+  ret i32 %sub
+}
+
+define i32 @orc_b_i32_sub_shl8x_x_b1_srl_used(i32  %x, ptr %arr) {
+; RV32I-LABEL: orc_b_i32_sub_shl8x_x_b1_srl_used:
+; RV32I:       # %bb.0: # %entry
+; RV32I-NEXT:    lui a2, 8224
+; RV32I-NEXT:    addi a2, a2, 514
+; RV32I-NEXT:    and a0, a0, a2
+; RV32I-NEXT:    slli a2, a0, 7
+; RV32I-NEXT:    srli a0, a0, 1
+; RV32I-NEXT:    sub a0, a2, a0
+; RV32I-NEXT:    sw a2, 0(a1)
+; RV32I-NEXT:    ret
+;
+; RV32ZBB-LABEL: orc_b_i32_sub_shl8x_x_b1_srl_used:
+; RV32ZBB:       # %bb.0: # %entry
+; RV32ZBB-NEXT:    lui a2, 8224
+; RV32ZBB-NEXT:    addi a2, a2, 514
+; RV32ZBB-NEXT:    and a0, a0, a2
+; RV32ZBB-NEXT:    slli a2, a0, 7
+; RV32ZBB-NEXT:    orc.b a0, a0
+; RV32ZBB-NEXT:    sw a2, 0(a1)
+; RV32ZBB-NEXT:    ret
+entry:
+  %and = and i32 %x, 33686018
+  %shl = shl i32 %and, 7
+  %shr = lshr exact i32 %and, 1
+  store i32 %shl, ptr %arr, align 4
+  %sub = sub nsw i32 %shl, %shr
+  ret i32 %sub
+}
+
+
+define i32 @orc_b_i32_sub_shl8x_x_b1_not_used(i32  %x, ptr %arr) {
+; RV32I-LABEL: orc_b_i32_sub_shl8x_x_b1_not_used:
+; RV32I:       # %bb.0: # %entry
+; RV32I-NEXT:    lui a1, 8224
+; RV32I-NEXT:    addi a1, a1, 514
+; RV32I-NEXT:    and a0, a0, a1
+; RV32I-NEXT:    slli a1, a0, 7
+; RV32I-NEXT:    srli a0, a0, 1
+; RV32I-NEXT:    sub a0, a1, a0
+; RV32I-NEXT:    ret
+;
+; RV32ZBB-LABEL: orc_b_i32_sub_shl8x_x_b1_not_used:
+; RV32ZBB:       # %bb.0: # %entry
+; RV32ZBB-NEXT:    lui a1, 8224
+; RV32ZBB-NEXT:    addi a1, a1, 514
+; RV32ZBB-NEXT:    and a0, a0, a1
+; RV32ZBB-NEXT:    orc.b a0, a0
+; RV32ZBB-NEXT:    ret
+entry:
+  %and = and i32 %x, 33686018
+  %shl = shl i32 %and, 7
+  %shr = lshr exact i32 %and, 1
+  %sub = sub nsw i32 %shl, %shr
+  ret i32 %sub
+}
+
+define i32 @orc_b_i32_sub_shl8x_x_shl_used(i32  %x, ptr %arr){
+; CHECK-LABEL: orc_b_i32_sub_shl8x_x_shl_used:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    lui a2, 4112
+; CHECK-NEXT:    addi a2, a2, 257
+; CHECK-NEXT:    and a0, a0, a2
+; CHECK-NEXT:    slli a2, a0, 8
+; CHECK-NEXT:    sub a0, a2, a0
+; CHECK-NEXT:    sw a2, 0(a1)
+; CHECK-NEXT:    ret
+entry:
+  %and = and i32 %x, 16843009
+  %shl = shl i32 %and, 8
+  store i32 %shl, ptr %arr, align 4
+  %sub = mul nuw i32 %and, 255
+  ret i32 %sub
+}
+
+define i32 @orc_b_i32_sub_shl8x_x_b1_both_used(i32  %x, ptr %arr) {
+; CHECK-LABEL: orc_b_i32_sub_shl8x_x_b1_both_used:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    lui a2, 8224
+; CHECK-NEXT:    addi a2, a2, 514
+; CHECK-NEXT:    and a0, a0, a2
+; CHECK-NEXT:    slli a2, a0, 7
+; CHECK-NEXT:    srli a3, a0, 1
+; CHECK-NEXT:    sw a2, 0(a1)
+; CHECK-NEXT:    sub a0, a2, a3
+; CHECK-NEXT:    sw a3, 4(a1)
+; CHECK-NEXT:    ret
+entry:
+  %and = and i32 %x, 33686018
+  %shl = shl i32 %and, 7
+  %shr = lshr exact i32 %and, 1
+  store i32 %shl, ptr %arr, align 4
+  %arrayidx1 = getelementptr inbounds i8, ptr %arr, i32 4
+  store i32 %shr, ptr %arrayidx1, align 4
+  %sub = sub nsw i32 %shl, %shr
+  ret i32 %sub
+}
+
+
+define i32 @orc_b_i32_sub_x_shr8x(i32 %x)  {
+; CHECK-LABEL: orc_b_i32_sub_x_shr8x:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    lui a1, 4112
+; CHECK-NEXT:    addi a1, a1, 257
+; CHECK-NEXT:    and a0, a0, a1
+; CHECK-NEXT:    srli a1, a0, 8
+; CHECK-NEXT:    sub a0, a0, a1
+; CHECK-NEXT:    ret
+entry:
+  %and = and i32 %x, 16843009
+  %shr = lshr i32 %and, 8
+  %sub = sub nsw i32 %and, %shr
+  ret i32 %sub
+}

>From 95030b0a825dae744cf42045dff4c398d0f68906 Mon Sep 17 00:00:00 2001
From: Dominik Adamski <dominik.adamski at amd.com>
Date: Fri, 11 Oct 2024 11:53:28 +0200
Subject: [PATCH 2/2] [OpenMP][Flang] Enable alias analysis inside omp target
 region (#111670)

At present, alias analysis does not work for operations inside OMP
target regions because the FIR declare operations within OMP target do
not offer sufficient information for alias analysis. Consequently, it is
necessary to examine the FIR code outside the OMP target region.
---
 .../lib/Optimizer/Analysis/AliasAnalysis.cpp  | 29 ++++++
 flang/lib/Optimizer/Analysis/CMakeLists.txt   |  2 +
 .../alias-analysis-omp-target-1.fir           | 66 +++++++++++++
 .../alias-analysis-omp-target-2.fir           | 96 +++++++++++++++++++
 4 files changed, 193 insertions(+)
 create mode 100644 flang/test/Analysis/AliasAnalysis/alias-analysis-omp-target-1.fir
 create mode 100644 flang/test/Analysis/AliasAnalysis/alias-analysis-omp-target-2.fir

diff --git a/flang/lib/Optimizer/Analysis/AliasAnalysis.cpp b/flang/lib/Optimizer/Analysis/AliasAnalysis.cpp
index e88da5a8ebae19..6ee4f0ff71057a 100644
--- a/flang/lib/Optimizer/Analysis/AliasAnalysis.cpp
+++ b/flang/lib/Optimizer/Analysis/AliasAnalysis.cpp
@@ -13,6 +13,8 @@
 #include "flang/Optimizer/Dialect/FortranVariableInterface.h"
 #include "flang/Optimizer/HLFIR/HLFIROps.h"
 #include "mlir/Analysis/AliasAnalysis.h"
+#include "mlir/Dialect/OpenMP/OpenMPDialect.h"
+#include "mlir/Dialect/OpenMP/OpenMPInterfaces.h"
 #include "mlir/IR/BuiltinOps.h"
 #include "mlir/IR/Value.h"
 #include "mlir/Interfaces/SideEffectInterfaces.h"
@@ -296,6 +298,17 @@ AliasAnalysis::Source AliasAnalysis::getSource(mlir::Value v,
             defOp = v.getDefiningOp();
             return;
           }
+          // If load is inside target and it points to mapped item,
+          // continue tracking.
+          Operation *loadMemrefOp = op.getMemref().getDefiningOp();
+          bool isDeclareOp = llvm::isa<fir::DeclareOp>(loadMemrefOp) ||
+                             llvm::isa<hlfir::DeclareOp>(loadMemrefOp);
+          if (isDeclareOp &&
+              llvm::isa<omp::TargetOp>(loadMemrefOp->getParentOp())) {
+            v = op.getMemref();
+            defOp = v.getDefiningOp();
+            return;
+          }
           // No further tracking for addresses loaded from memory for now.
           type = SourceKind::Indirect;
           breakFromLoop = true;
@@ -319,6 +332,22 @@ AliasAnalysis::Source AliasAnalysis::getSource(mlir::Value v,
           breakFromLoop = true;
         })
         .Case<hlfir::DeclareOp, fir::DeclareOp>([&](auto op) {
+          // If declare operation is inside omp target region,
+          // continue alias analysis outside the target region
+          if (auto targetOp =
+                  llvm::dyn_cast<omp::TargetOp>(op->getParentOp())) {
+            auto argIface = cast<omp::BlockArgOpenMPOpInterface>(*targetOp);
+            for (auto [opArg, blockArg] : llvm::zip_equal(
+                     targetOp.getMapVars(), argIface.getMapBlockArgs())) {
+              if (blockArg == op.getMemref()) {
+                omp::MapInfoOp mapInfo =
+                    llvm::cast<omp::MapInfoOp>(opArg.getDefiningOp());
+                v = mapInfo.getVarPtr();
+                defOp = v.getDefiningOp();
+                return;
+              }
+            }
+          }
           auto varIf = llvm::cast<fir::FortranVariableOpInterface>(defOp);
           // While going through a declare operation collect
           // the variable attributes from it. Right now, some
diff --git a/flang/lib/Optimizer/Analysis/CMakeLists.txt b/flang/lib/Optimizer/Analysis/CMakeLists.txt
index 436d4d3f18969c..c000a9da99f871 100644
--- a/flang/lib/Optimizer/Analysis/CMakeLists.txt
+++ b/flang/lib/Optimizer/Analysis/CMakeLists.txt
@@ -6,6 +6,7 @@ add_flang_library(FIRAnalysis
   FIRDialect
   HLFIRDialect
   MLIRIR
+  MLIROpenMPDialect
 
   LINK_LIBS
   FIRBuilder
@@ -14,5 +15,6 @@ add_flang_library(FIRAnalysis
   MLIRFuncDialect
   MLIRLLVMDialect
   MLIRMathTransforms
+  MLIROpenMPDialect
   FIRSupport
 )
diff --git a/flang/test/Analysis/AliasAnalysis/alias-analysis-omp-target-1.fir b/flang/test/Analysis/AliasAnalysis/alias-analysis-omp-target-1.fir
new file mode 100644
index 00000000000000..88f411847172a0
--- /dev/null
+++ b/flang/test/Analysis/AliasAnalysis/alias-analysis-omp-target-1.fir
@@ -0,0 +1,66 @@
+// Use --mlir-disable-threading so that the AA queries are serialized
+// as well as its diagnostic output.
+// RUN: fir-opt %s -pass-pipeline='builtin.module(func.func(test-fir-alias-analysis))' -split-input-file --mlir-disable-threading 2>&1 | FileCheck %s
+
+// Fortran source code:
+//
+// program TestAllocatableArray
+// real(kind=8),  allocatable :: A(:)
+// real(kind=8),  allocatable :: B(:)
+// !$omp target
+//    A(0) = B(0)
+// !$omp end target
+// end TestAllocatableArray
+
+// CHECK-LABEL: Testing : "_QPTestAllocatableArray"
+// CHECK-DAG: targetArrayB#0 <-> targetArrayA#0: NoAlias
+func.func @_QPTestAllocatableArray() {
+  %0 = fir.address_of(@_QFEa) : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>
+  %1:2 = hlfir.declare %0 {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "ArrayA" } : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>)
+  %2 = fir.address_of(@_QFEb) : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>
+  %3:2 = hlfir.declare %2 {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "ArrayB" } : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>)
+  %4 = fir.load %1#0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>
+  %c1 = arith.constant 1 : index
+  %c0 = arith.constant 0 : index
+  %5 = fir.load %1#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>
+  %c0_0 = arith.constant 0 : index
+  %6:3 = fir.box_dims %5, %c0_0 : (!fir.box<!fir.heap<!fir.array<?xf64>>>, index) -> (index, index, index)
+  %7:3 = fir.box_dims %4, %c0 : (!fir.box<!fir.heap<!fir.array<?xf64>>>, index) -> (index, index, index)
+  %c0_1 = arith.constant 0 : index
+  %8 = arith.subi %7#1, %c1 : index
+  %9 = omp.map.bounds lower_bound(%c0_1 : index) upper_bound(%8 : index) extent(%7#1 : index) stride(%7#2 : index) start_idx(%6#0 : index) {stride_in_bytes = true}
+  %10 = fir.box_offset %1#1 base_addr : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>) -> !fir.llvm_ptr<!fir.ref<!fir.array<?xf64>>>
+  %11 = omp.map.info var_ptr(%1#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>, !fir.array<?xf64>) var_ptr_ptr(%10 : !fir.llvm_ptr<!fir.ref<!fir.array<?xf64>>>) map_clauses(implicit, tofrom) capture(ByRef) bounds(%9) -> !fir.llvm_ptr<!fir.ref<!fir.array<?xf64>>> {name = ""}
+  %12 = omp.map.info var_ptr(%1#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>, !fir.box<!fir.heap<!fir.array<?xf64>>>) map_clauses(implicit, tofrom) capture(ByRef) members(%11 : [0] : !fir.llvm_ptr<!fir.ref<!fir.array<?xf64>>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>> {name = "a"}
+  %13 = fir.load %3#0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>
+  %c1_2 = arith.constant 1 : index
+  %c0_3 = arith.constant 0 : index
+  %14 = fir.load %3#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>
+  %c0_4 = arith.constant 0 : index
+  %15:3 = fir.box_dims %14, %c0_4 : (!fir.box<!fir.heap<!fir.array<?xf64>>>, index) -> (index, index, index)
+  %16:3 = fir.box_dims %13, %c0_3 : (!fir.box<!fir.heap<!fir.array<?xf64>>>, index) -> (index, index, index)
+  %c0_5 = arith.constant 0 : index
+  %17 = arith.subi %16#1, %c1_2 : index
+  %18 = omp.map.bounds lower_bound(%c0_5 : index) upper_bound(%17 : index) extent(%16#1 : index) stride(%16#2 : index) start_idx(%15#0 : index) {stride_in_bytes = true}
+  %19 = fir.box_offset %3#1 base_addr : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>) -> !fir.llvm_ptr<!fir.ref<!fir.array<?xf64>>>
+  %20 = omp.map.info var_ptr(%3#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>, !fir.array<?xf64>) var_ptr_ptr(%19 : !fir.llvm_ptr<!fir.ref<!fir.array<?xf64>>>) map_clauses(implicit, tofrom) capture(ByRef) bounds(%18) -> !fir.llvm_ptr<!fir.ref<!fir.array<?xf64>>> {name = ""}
+  %21 = omp.map.info var_ptr(%3#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>, !fir.box<!fir.heap<!fir.array<?xf64>>>) map_clauses(implicit, tofrom) capture(ByRef) members(%20 : [0] : !fir.llvm_ptr<!fir.ref<!fir.array<?xf64>>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>> {name = "b"}
+  omp.target map_entries(%11 -> %arg0, %12 -> %arg1, %20 -> %arg2, %21 -> %arg3 : !fir.llvm_ptr<!fir.ref<!fir.array<?xf64>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>, !fir.llvm_ptr<!fir.ref<!fir.array<?xf64>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>) {
+    %22:2 = hlfir.declare %arg1 {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFEa"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>)
+    %23:2 = hlfir.declare %arg3 {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFEb"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>)
+    %24 = fir.load %23#0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>
+    %c0_6 = arith.constant 0 : index
+    %25 = hlfir.designate %24 (%c0_6) {test.ptr = "targetArrayB"} : (!fir.box<!fir.heap<!fir.array<?xf64>>>, index) -> !fir.ref<f64>
+    %26 = fir.load %25 : !fir.ref<f64>
+    %27 = fir.load %22#0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>>
+    %c0_7 = arith.constant 0 : index
+    %28 = hlfir.designate %27 (%c0_7) {test.ptr = "targetArrayA"} : (!fir.box<!fir.heap<!fir.array<?xf64>>>, index) -> !fir.ref<f64>
+    hlfir.assign %26 to %28 : f64, !fir.ref<f64>
+    omp.terminator
+  }
+  return
+}
+fir.global internal @_QFEa : !fir.box<!fir.heap<!fir.array<?xf64>>> {
+}
+fir.global internal @_QFEb : !fir.box<!fir.heap<!fir.array<?xf64>>> {
+}
diff --git a/flang/test/Analysis/AliasAnalysis/alias-analysis-omp-target-2.fir b/flang/test/Analysis/AliasAnalysis/alias-analysis-omp-target-2.fir
new file mode 100644
index 00000000000000..c6b2e29a7188a9
--- /dev/null
+++ b/flang/test/Analysis/AliasAnalysis/alias-analysis-omp-target-2.fir
@@ -0,0 +1,96 @@
+// Use --mlir-disable-threading so that the AA queries are serialized
+// as well as its diagnostic output.
+// RUN: fir-opt %s -pass-pipeline='builtin.module(func.func(test-fir-alias-analysis))' -split-input-file --mlir-disable-threading 2>&1 | FileCheck %s
+
+// Fortran source code:
+//
+// subroutine TestTargetData(p, a, b)
+//   real    ::  p(10), a(10), b(10)
+//   !$omp target data map(from: p)
+//      !$omp target map(to: a )
+//            p(1) = a(1)
+//      !$omp end target
+//      !$omp target map(to: b )
+//            p(1) = b(1)
+//      !$omp end target
+//   !$omp end target data
+// end subroutine
+
+// CHECK-LABEL: Testing : "_QPTestTargetData"
+
+// CHECK-DAG: targetArrayA#0 <-> targetArrayP#0: NoAlias
+// CHECK-DAG: targetArrayA#0 <-> targetArrayB#0: NoAlias
+// CHECK-DAG: targetArrayP#0 <-> targetArrayB#0: NoAlias
+
+func.func @_QPTestTargetData(%arg0: !fir.ref<!fir.array<10xf32>> {fir.bindc_name = "p"}, %arg1: !fir.ref<!fir.array<10xf32>> {fir.bindc_name = "a"}, %arg2: !fir.ref<!fir.array<10xf32>> {fir.bindc_name = "b"}) {
+  %0 = fir.dummy_scope : !fir.dscope
+  %c10 = arith.constant 10 : index
+  %1 = fir.shape %c10 : (index) -> !fir.shape<1>
+  %2:2 = hlfir.declare %arg1(%1) dummy_scope %0 {uniq_name = "_QFtest_target_dataEa"} : (!fir.ref<!fir.array<10xf32>>, !fir.shape<1>, !fir.dscope) -> (!fir.ref<!fir.array<10xf32>>, !fir.ref<!fir.array<10xf32>>)
+  %c10_0 = arith.constant 10 : index
+  %3 = fir.shape %c10_0 : (index) -> !fir.shape<1>
+  %4:2 = hlfir.declare %arg2(%3) dummy_scope %0 {uniq_name = "_QFtest_target_dataEb"} : (!fir.ref<!fir.array<10xf32>>, !fir.shape<1>, !fir.dscope) -> (!fir.ref<!fir.array<10xf32>>, !fir.ref<!fir.array<10xf32>>)
+  %c10_1 = arith.constant 10 : index
+  %5 = fir.shape %c10_1 : (index) -> !fir.shape<1>
+  %6:2 = hlfir.declare %arg0(%5) dummy_scope %0 {uniq_name = "_QFtest_target_dataEp"} : (!fir.ref<!fir.array<10xf32>>, !fir.shape<1>, !fir.dscope) -> (!fir.ref<!fir.array<10xf32>>, !fir.ref<!fir.array<10xf32>>)
+  %c1 = arith.constant 1 : index
+  %c0 = arith.constant 0 : index
+  %7 = arith.subi %c10_1, %c1 : index
+  %8 = omp.map.bounds lower_bound(%c0 : index) upper_bound(%7 : index) extent(%c10_1 : index) stride(%c1 : index) start_idx(%c1 : index)
+  %9 = omp.map.info var_ptr(%6#1 : !fir.ref<!fir.array<10xf32>>, !fir.array<10xf32>) map_clauses(from) capture(ByRef) bounds(%8) -> !fir.ref<!fir.array<10xf32>> {name = "p"}
+  omp.target_data map_entries(%9 : !fir.ref<!fir.array<10xf32>>) {
+    %c1_2 = arith.constant 1 : index
+    %c0_3 = arith.constant 0 : index
+    %10 = arith.subi %c10, %c1_2 : index
+    %11 = omp.map.bounds lower_bound(%c0_3 : index) upper_bound(%10 : index) extent(%c10 : index) stride(%c1_2 : index) start_idx(%c1_2 : index)
+    %12 = omp.map.info var_ptr(%2#1 : !fir.ref<!fir.array<10xf32>>, !fir.array<10xf32>) map_clauses(to) capture(ByRef) bounds(%11) -> !fir.ref<!fir.array<10xf32>> {name = "a"}
+    %c1_4 = arith.constant 1 : index
+    %c0_5 = arith.constant 0 : index
+    %13 = arith.subi %c10_1, %c1_4 : index
+    %14 = omp.map.bounds lower_bound(%c0_5 : index) upper_bound(%13 : index) extent(%c10_1 : index) stride(%c1_4 : index) start_idx(%c1_4 : index)
+    %15 = omp.map.info var_ptr(%6#1 : !fir.ref<!fir.array<10xf32>>, !fir.array<10xf32>) map_clauses(implicit, tofrom) capture(ByRef) bounds(%14) -> !fir.ref<!fir.array<10xf32>> {name = "p"}
+    omp.target map_entries(%12 -> %arg3, %15 -> %arg4 : !fir.ref<!fir.array<10xf32>>, !fir.ref<!fir.array<10xf32>>) {
+      %c10_10 = arith.constant 10 : index
+      %22 = fir.shape %c10_10 : (index) -> !fir.shape<1>
+      %23:2 = hlfir.declare %arg3(%22) {uniq_name = "_QFtest_target_dataEa"} : (!fir.ref<!fir.array<10xf32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<10xf32>>, !fir.ref<!fir.array<10xf32>>)
+      %c10_11 = arith.constant 10 : index
+      %24 = fir.shape %c10_11 : (index) -> !fir.shape<1>
+      %25:2 = hlfir.declare %arg4(%24) {uniq_name = "_QFtest_target_dataEp"} : (!fir.ref<!fir.array<10xf32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<10xf32>>, !fir.ref<!fir.array<10xf32>>)
+      %c1_12 = arith.constant 1 : index
+      %26 = hlfir.designate %23#0 (%c1_12) {test.ptr = "targetArrayA"}  : (!fir.ref<!fir.array<10xf32>>, index) -> !fir.ref<f32>
+      %27 = fir.load %26 : !fir.ref<f32>
+      %c1_13 = arith.constant 1 : index
+      %28 = hlfir.designate %25#0 (%c1_13) {test.ptr = "targetArrayP"}  : (!fir.ref<!fir.array<10xf32>>, index) -> !fir.ref<f32>
+      hlfir.assign %27 to %28 : f32, !fir.ref<f32>
+      omp.terminator
+    }
+    %c1_6 = arith.constant 1 : index
+    %c0_7 = arith.constant 0 : index
+    %16 = arith.subi %c10_0, %c1_6 : index
+    %17 = omp.map.bounds lower_bound(%c0_7 : index) upper_bound(%16 : index) extent(%c10_0 : index) stride(%c1_6 : index) start_idx(%c1_6 : index)
+    %18 = omp.map.info var_ptr(%4#1 : !fir.ref<!fir.array<10xf32>>, !fir.array<10xf32>) map_clauses(to) capture(ByRef) bounds(%17) -> !fir.ref<!fir.array<10xf32>> {name = "b"}
+    %c1_8 = arith.constant 1 : index
+    %c0_9 = arith.constant 0 : index
+    %19 = arith.subi %c10_1, %c1_8 : index
+    %20 = omp.map.bounds lower_bound(%c0_9 : index) upper_bound(%19 : index) extent(%c10_1 : index) stride(%c1_8 : index) start_idx(%c1_8 : index)
+    %21 = omp.map.info var_ptr(%6#1 : !fir.ref<!fir.array<10xf32>>, !fir.array<10xf32>) map_clauses(implicit, tofrom) capture(ByRef) bounds(%20) -> !fir.ref<!fir.array<10xf32>> {name = "p"}
+    omp.target map_entries(%18 -> %arg3, %21 -> %arg4 : !fir.ref<!fir.array<10xf32>>, !fir.ref<!fir.array<10xf32>>) {
+      %c10_10 = arith.constant 10 : index
+      %22 = fir.shape %c10_10 : (index) -> !fir.shape<1>
+      %23:2 = hlfir.declare %arg3(%22) {uniq_name = "_QFtest_target_dataEb"} : (!fir.ref<!fir.array<10xf32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<10xf32>>, !fir.ref<!fir.array<10xf32>>)
+      %c10_11 = arith.constant 10 : index
+      %24 = fir.shape %c10_11 : (index) -> !fir.shape<1>
+      %25:2 = hlfir.declare %arg4(%24) {uniq_name = "_QFtest_target_dataEp"} : (!fir.ref<!fir.array<10xf32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<10xf32>>, !fir.ref<!fir.array<10xf32>>)
+      %c1_12 = arith.constant 1 : index
+      %26 = hlfir.designate %23#0 (%c1_12) {test.ptr = "targetArrayB"}  : (!fir.ref<!fir.array<10xf32>>, index) -> !fir.ref<f32>
+      %27 = fir.load %26 : !fir.ref<f32>
+      %c1_13 = arith.constant 1 : index
+      %28 = hlfir.designate %25#0 (%c1_13) {test.ptr = "targetArrayP"}  : (!fir.ref<!fir.array<10xf32>>, index) -> !fir.ref<f32>
+      hlfir.assign %27 to %28 : f32, !fir.ref<f32>
+      omp.terminator
+    }
+    omp.terminator
+  }
+  return
+}
+