<table border="1" cellspacing="0" cellpadding="8">

    <tr>

        <th>Issue</th>

        <td>

            <a href=https://github.com/llvm/llvm-project/issues/56156>56156</a>

        </td>

    </tr>

    <tr>

        <th>Summary</th>

        <td>

            Commit that adds Float16 type support causes correctness error downstream

        </td>

    </tr>

    <tr>

      <th>Labels</th>

      <td>

            new issue

      </td>

    </tr>

    <tr>

      <th>Assignees</th>

      <td>

            phoebewang

      </td>

    </tr>

    <tr>

      <th>Reporter</th>

      <td>

          MaheshRavishankar

      </td>

    </tr>

</table>

<pre>

    Commit https://github.com/llvm/llvm-project/commit/655ba9c8a1d22075443711cc749f0b032e07adee was found to be root cause of a correctness when IREE was rebased on top of LLVM tree. The correctness error shows up on an fp16 test compiled for x86. In MLIR it is specified as follows

```

func.func @tensor_fp16() {

  %input = util.unfoldable_constant dense<[0.0, 1.0, 2.0, 4.0]> : tensor<4xf16>

  %result = "mhlo.exponential"(%input) : (tensor<4xf16>) -> tensor<4xf16>

  check.expect_almost_eq_const(%result, dense<[1.0, 2.7183, 7.3891, 54.5981]> : tensor<4xf16>) : tensor<4xf16>

  return

}

```

This is translated to the following LLVM IR.

```

; ModuleID = '_tensor_fp16_dispatch_0'                                                               

source_filename = "_tensor_fp16_dispatch_0"                                                          

target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"                                                                                                               

target triple = "x86_64-unknown-unknown-eabi-elf"                                                                                                                                                          

%iree_hal_executable_environment_v0_t = type { i32*, i32 (i32 (i8*)*, i8*)*, i32 (i8*)**, %iree_hal_processor_v0_t }

%iree_hal_processor_v0_t = type { [8 x i64] }                                                                                                                                                              

%iree_hal_executable_dispatch_state_v0_t = type { i32, i32, i16, i16, i32, i32, i16, i8, i8, i32*, i8**, i64* }                                                                                            

%iree_hal_executable_workgroup_state_v0_t = type { i32, i32, i16, i16, i32, i8**, i32 }

declare i8* @malloc(i64 %0)                                                                          

declare void @free(i8* %0)                                                                                                                                                                                 

define internal i32 @_tensor_fp16_dispatch_0(%iree_hal_executable_environment_v0_t* noalias align 16 %0, %iree_hal_executable_dispatch_state_v0_t* noalias align 16 %1, %iree_hal_executable_workgroup_stat

e_v0_t* noalias align 16 %2) !dbg !3 {                                                               

  %4 = load %iree_hal_executable_dispatch_state_v0_t, %iree_hal_executable_dispatch_state_v0_t* %1, align 8, !dbg !7

  %5 = extractvalue %iree_hal_executable_dispatch_state_v0_t %4, 10, !dbg !7                         

  %6 = load i8*, i8** %5, align 8, !dbg !7                                                           

  %7 = bitcast i8* %6 to half*, !dbg !7                                                                                                                                                                    

  %8 = ptrtoint half* %7 to i64, !dbg !7                                                             

  %9 = and i64 %8, 63, !dbg !7                                                                                                                                                                             

  %10 = icmp eq i64 %9, 0, !dbg !7                                                                                                                                                                         

  call void @llvm.assume(i1 %10), !dbg !7                                                            

  %11 = load %iree_hal_executable_dispatch_state_v0_t, %iree_hal_executable_dispatch_state_v0_t* %1, align 8, !dbg !9                                                                                      

  %12 = extractvalue %iree_hal_executable_dispatch_state_v0_t %11, 10, !dbg !9

  %13 = getelementptr i8*, i8** %12, i64 1, !dbg !9

  %14 = load i8*, i8** %13, align 8, !dbg !9

  %15 = bitcast i8* %14 to half*, !dbg !9

  %16 = ptrtoint half* %15 to i64, !dbg !9

  %17 = and i64 %16, 63, !dbg !9

  %18 = icmp eq i64 %17, 0, !dbg !9

  call void @llvm.assume(i1 %18), !dbg !9

  %19 = bitcast half* %7 to <4 x half>*, !dbg !9

  %20 = load <4 x half>, <4 x half>* %19, align 2, !dbg !9

  %21 = call <4 x half> @llvm.exp.v4f16(<4 x half> %20), !dbg !9

  %22 = bitcast half* %15 to <4 x half>*, !dbg !9

  store <4 x half> %21, <4 x half>* %22, align 2, !dbg !9

  ret i32 0, !dbg !9

}

; Function Attrs: inaccessiblememonly nocallback nofree nosync nounwind willreturn

declare void @llvm.assume(i1 noundef %0) #0

; Function Attrs: nocallback nofree nosync nounwind readnone speculatable willreturn

declare <4 x half> @llvm.exp.v4f16(<4 x half> %0) #1

attributes #0 = { inaccessiblememonly nocallback nofree nosync nounwind willreturn }

attributes #1 = { nocallback nofree nosync nounwind readnone speculatable willreturn }

!llvm.dbg.cu = !{!0}

!llvm.module.flags = !{!2}

!0 = distinct !DICompileUnit(language: DW_LANG_C, file: !1, producer: "mlir", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug)

!1 = !DIFile(filename: "_tensor_fp16_dispatch_0", directory: "/")

!2 = !{i32 2, !"Debug Info Version", i32 3}

!3 = distinct !DISubprogram(name: "_tensor_fp16_dispatch_0", linkageName: "_tensor_fp16_dispatch_0", scope: null, file: !4, line: 3, type: !5, scopeLine: 3, spFlags: DISPFlagDefinition | DISPFlagOptimize

d, unit: !0, retainedNodes: !6)

!4 = !DIFile(filename: "iree/tests/e2e/xla_ops/exponential_fp16.mlir", directory: "/usr/local/google/home/ravishankarm/iree")

!5 = !DISubroutineType(types: !6)

!6 = !{}

!7 = !DILocation(line: 2, column: 12, scope: !8)

!8 = !DILexicalBlockFile(scope: !3, file: !4, discriminator: 0)

!9 = !DILocation(line: 3, column: 13, scope: !8)

```

With commit https://github.com/llvm/llvm-project/commit/655ba9c8a1d22075443711cc749f0b032e07adee it produces the following error

```

 Failure

Failed

Expected near equality of these values. Contents does not match.

  lhs:

    4xf16=-971 0 0 0

  rhs:

    4xf16=1 2.71875 7.39062 54.5938

```

Without the commit the test passes.

</pre>

<img width="1px" height="1px" alt="" src="http://email.email.llvm.org/o/eJzVGdly2zjya-gXlFUkqIN68INj2VuudbJbSXbmUQWSkIQ1CHAA0Md-_XQDpETJkmyPHNcs4wg8gL670ejOdfl8caWrSjiycq62UXoZ0Rv4Wwq3avJBoSt4kPKhG85ro__LCwePhV8HN-PRKGfTImNJSWk8GQ2H6SRJimIynC7iPE4pjyes5Jw8MksWulElcZrknBitHSlYYznRC8JIoY0B2IpbSx5XXJHb79fXfpXhObO8JFrB0hpn39399pU4w_mA_FzxraXcGG2IXelHS5oa1zBFFnUyJo5bQKirWkgAtoBZT9l4QG4V-Xp3-52AFIQltuaFWAiY4MmVEuBE8SyKL6Nx3P75x0WjigH-kGgYO66sNnNEE9EsolMSTb6EeYREdCRU3TgSpTPSOCEHjQLIJcslnxdaWceUIyWA4FF6FY2-xIM4olckCQMNwxCG0SxKrwHMJQkIYfrwaQE40-seMsNtIwO2iNJqJfWAP9VaceUEk_DKkxho8qQCPHj1EiR8O0eEB5EVK17cI3AQ_pzJSls3538EngKSQAvS3-NvzdgkyVK8mwzSbJrg3Wg4GE2z5CinLckHqTLcNUa1SpvM9mrv5wp0DX_OMGUlc9xbpQNbCjoXahmM7Pb7YC-AKP1Cvuqykfx21op6Mu-ZwbwUtmauWM2B1wk57QoorW5MwecLMF_FKt4p-CBWeipCx8ySg2UyxyR71s3apvh5BRrg5zWdxHCTUv-Dj8n2I96Nh_7nXHQ3iwwXJTQ7VxnejLs18O1HgmZzAuWns-uMqOVauhAi5kBXo-6VflTrkbNcnHO5-Hxa33F1DvFpqDCqQFCer5ic8ydeNM7HOK4ehNGqggA0f4jnwYzcc80xShIBuqeX6Pxwh4GoGzL_etp93HncM8l_6NMA21UBewI4R0C7DgZH5vRJg1iVkSeCljua4fJPkeVfvo4qYR0ZYLtx_JAertYD7mSbYe-3rPe7UWK20QUKjl7-asEdZftRm_ul0U19Et99ntDwOkP6lWx98hUYKnkhmeGBY8xsKgbbYYGeNh6ib8W4_X4w0lOJfdCiRFoXoP8uJnw8rZ90nSqShVCgPuW4UUwGax3GB3OE7I0hGyWqNJMCsmL4XSoCCXUQ8XbIPR5xDoFJjoDZ9uDA5nFo1KeINCnzJQ6pd_OP0QpAH_rwITUr38X4e8XUSSUwlgUAHUuTHj0jTw9_glS2cA9MNvwdmDxD_rAR72B4iyjGG1G0IXITKz1phxn4GF1MPAG5cAWDs93a8ceYzAPzi3VO8BF4P-3qMZh5BmtnnAaf7ngKrAOPfoP9OP56iKceMYPTehv4vQLH6f-hONdXj70k9vyJoqoJ_6PjcYrcvdkR_m7X-lQOW_Z6T8SyzYBZ21R-a0wC8z5j_jAu-3JN_gbR8Rdt-n026alBN0n2Rd1pH0fqccCRlEuOGzFEgf1hNqFtsk2SI-CGrwTrJD0s0D6c0f6YC_APBd2t5eODEQ0g7wtpW6snu2Ep5Om7cWlrTbbP1WELfenr07e7UPbChbZwTreEtBu2sWgF50r_GgtaR-RF455D7Sy7egkoIN8okh6BHFzVs7oNZ800f6oHD8NFKGruTEHSjsqA0kMyCIp-qxCs04a_oBDpPygBSl-XgOHOZ8f7TWBTKlhX-24aVTihFbl0zmCtHLJsVmDtQOTooJVW8hnyURRozop7uMUDCQz2WRUwNOpRgOE-Cin7JcrdU8wLc8OVkNevTzQRTeNjZL1OguGsVBrOCVjtbiTzkeogYX_JODpKk74YGZAo8sZx65kINTY8gp8oyM2BfBtDssZwukzIrknQxIsB7GZQNG25MMGyP4Wovqk0hVmVLxUPFpIt7fZc-hJuEAxsHE6AbnHq7PYq9C3-o7Dpkkmmlg1bclT37Pf53eW3f8yv0JCxOBzq-WGDqQ0gLrgJ72glhYmCdwj7r9qJSvyPl76abhqOr02j4C3_jRsLRoVfvIPwSlh88U8QFb68aaSc8bxZYgjo6E46zma3N0gHzbpadYv-cKnadwgENnG0eW5n-14U7SOgPdGh73a-DdM8MeRWLTTpaKddXYekfX2ke6T7o8lBUkvDKiD6zQRLoe5BCd_eOt8WuvYzFUhvR1vDFqB_9rsZFqjaj6P16rv-DFvfoD15I7j98W98mOH5X_h4EE2u1q87VbeOjYsbtKQA3msYrJwB8PKbLrltP4z7wh--pl3hyy832GazMHKKT0-SzXXtnzc9KC-fQc8Y96i-sQa7jui32JHUeok4b1Yaw-KNYQ_CrhjI32BzMqDespXRhlzQrtENqJv_RJnSDEW7l8dx3zd7NjPZALsDipw3r6xTl2eh0LKpvMMk28qGVVkfR9aDxZ8E8PcFuLxvZdpflu6zEbCqwoAyFXPatP65AT49Smi6Q2h6hNCddpf__V24FSk-rWMMWNr4ZXdac77Ju5dQcsOEbExr6fgA8c3fX_s2JS-J4sxAJthAfuCesZsMsC3sv5jF2wG50gq82FlSasCrtCMVevGgSxzkyrO9LtO17cfZ-XSSkBj_rVOMAzOT0PqcjLDxOY3HNLQ90-wV0WMHzvl2t9cA3vqedg3ZAlAeJp6VF2k5TafszAknedfgdyvmCCtLS8CnmMN2OBbAbVPX2rR9eLunkV7qR2UdbI7VWWPkxbu1DtsGSBVuRuNkND5bXaRDCsQxPk0nlNHxYrooh7Qc5uM4i7O8GJ1JlnNpL6IR7I5U8UfiQaB3j2Zn4oLGYDRj-D9M0jQd8HSc8zwvFkVcTDM2hOyEV6D1gd92tVmemQtPEmwPFlMXiPt28xEkB0ki5y26eqV5zh9hd23xsQbEbi6-MjCR1fdN0DnzRF14pv4EZiKgrQ">