<table border="1" cellspacing="0" cellpadding="8">
    <tr>
        <th>Issue</th>
        <td>
            <a href=https://github.com/llvm/llvm-project/issues/56156>56156</a>
        </td>
    </tr>

    <tr>
        <th>Summary</th>
        <td>
            Commit that adds Float16 type support causes correctness error downstream
        </td>
    </tr>

    <tr>
      <th>Labels</th>
      <td>
            new issue
      </td>
    </tr>

    <tr>
      <th>Assignees</th>
      <td>
            phoebewang
      </td>
    </tr>

    <tr>
      <th>Reporter</th>
      <td>
          MaheshRavishankar
      </td>
    </tr>
</table>

<pre>
    Commit https://github.com/llvm/llvm-project/commit/655ba9c8a1d22075443711cc749f0b032e07adee was found to be root cause of a correctness when IREE was rebased on top of LLVM tree. The correctness error shows up on an fp16 test compiled for x86. In MLIR it is specified as follows
```
func.func @tensor_fp16() {
  %input = util.unfoldable_constant dense<[0.0, 1.0, 2.0, 4.0]> : tensor<4xf16>
  %result = "mhlo.exponential"(%input) : (tensor<4xf16>) -> tensor<4xf16>
  check.expect_almost_eq_const(%result, dense<[1.0, 2.7183, 7.3891, 54.5981]> : tensor<4xf16>) : tensor<4xf16>
  return
}
```
This is translated to the following LLVM IR.
```
; ModuleID = '_tensor_fp16_dispatch_0'                                                               
source_filename = "_tensor_fp16_dispatch_0"                                                          
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"                                                                                                               
target triple = "x86_64-unknown-unknown-eabi-elf"                                                                                                                                                          
                                                                                                     
%iree_hal_executable_environment_v0_t = type { i32*, i32 (i32 (i8*)*, i8*)*, i32 (i8*)**, %iree_hal_processor_v0_t }
%iree_hal_processor_v0_t = type { [8 x i64] }                                                                                                                                                              
%iree_hal_executable_dispatch_state_v0_t = type { i32, i32, i16, i16, i32, i32, i16, i8, i8, i32*, i8**, i64* }                                                                                            
%iree_hal_executable_workgroup_state_v0_t = type { i32, i32, i16, i16, i32, i8**, i32 }
                                                                                                                                                                                                           
declare i8* @malloc(i64 %0)                                                                          
                                                  
declare void @free(i8* %0)                                                                                                                                                                                 
                                                  
define internal i32 @_tensor_fp16_dispatch_0(%iree_hal_executable_environment_v0_t* noalias align 16 %0, %iree_hal_executable_dispatch_state_v0_t* noalias align 16 %1, %iree_hal_executable_workgroup_stat
e_v0_t* noalias align 16 %2) !dbg !3 {                                                               
  %4 = load %iree_hal_executable_dispatch_state_v0_t, %iree_hal_executable_dispatch_state_v0_t* %1, align 8, !dbg !7
  %5 = extractvalue %iree_hal_executable_dispatch_state_v0_t %4, 10, !dbg !7                         
  %6 = load i8*, i8** %5, align 8, !dbg !7                                                           
  %7 = bitcast i8* %6 to half*, !dbg !7                                                                                                                                                                    
  %8 = ptrtoint half* %7 to i64, !dbg !7                                                             
  %9 = and i64 %8, 63, !dbg !7                                                                                                                                                                             
  %10 = icmp eq i64 %9, 0, !dbg !7                                                                                                                                                                         
  call void @llvm.assume(i1 %10), !dbg !7                                                            
  %11 = load %iree_hal_executable_dispatch_state_v0_t, %iree_hal_executable_dispatch_state_v0_t* %1, align 8, !dbg !9                                                                                      
  %12 = extractvalue %iree_hal_executable_dispatch_state_v0_t %11, 10, !dbg !9
  %13 = getelementptr i8*, i8** %12, i64 1, !dbg !9
  %14 = load i8*, i8** %13, align 8, !dbg !9
  %15 = bitcast i8* %14 to half*, !dbg !9
  %16 = ptrtoint half* %15 to i64, !dbg !9
  %17 = and i64 %16, 63, !dbg !9
  %18 = icmp eq i64 %17, 0, !dbg !9
  call void @llvm.assume(i1 %18), !dbg !9
  %19 = bitcast half* %7 to <4 x half>*, !dbg !9
  %20 = load <4 x half>, <4 x half>* %19, align 2, !dbg !9
  %21 = call <4 x half> @llvm.exp.v4f16(<4 x half> %20), !dbg !9
  %22 = bitcast half* %15 to <4 x half>*, !dbg !9
  store <4 x half> %21, <4 x half>* %22, align 2, !dbg !9
  ret i32 0, !dbg !9
}

; Function Attrs: inaccessiblememonly nocallback nofree nosync nounwind willreturn
declare void @llvm.assume(i1 noundef %0) #0
; Function Attrs: nocallback nofree nosync nounwind readnone speculatable willreturn
declare <4 x half> @llvm.exp.v4f16(<4 x half> %0) #1

attributes #0 = { inaccessiblememonly nocallback nofree nosync nounwind willreturn }
attributes #1 = { nocallback nofree nosync nounwind readnone speculatable willreturn }

!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!2}

!0 = distinct !DICompileUnit(language: DW_LANG_C, file: !1, producer: "mlir", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug)
!1 = !DIFile(filename: "_tensor_fp16_dispatch_0", directory: "/")
!2 = !{i32 2, !"Debug Info Version", i32 3}
!3 = distinct !DISubprogram(name: "_tensor_fp16_dispatch_0", linkageName: "_tensor_fp16_dispatch_0", scope: null, file: !4, line: 3, type: !5, scopeLine: 3, spFlags: DISPFlagDefinition | DISPFlagOptimize
d, unit: !0, retainedNodes: !6)
!4 = !DIFile(filename: "iree/tests/e2e/xla_ops/exponential_fp16.mlir", directory: "/usr/local/google/home/ravishankarm/iree")
!5 = !DISubroutineType(types: !6)
!6 = !{}
!7 = !DILocation(line: 2, column: 12, scope: !8)
!8 = !DILexicalBlockFile(scope: !3, file: !4, discriminator: 0)
!9 = !DILocation(line: 3, column: 13, scope: !8)
```

With commit https://github.com/llvm/llvm-project/commit/655ba9c8a1d22075443711cc749f0b032e07adee it produces the following error
```
 Failure
Failed
Expected near equality of these values. Contents does not match.
  lhs:
    4xf16=-971 0 0 0
  rhs:
    4xf16=1 2.71875 7.39062 54.5938
```

Without the commit the test passes.

</pre>
<img width="1px" height="1px" alt="" src="http://email.email.llvm.org/o/eJzVGdly2zjya-gXlFUkqIN68INj2VuudbJbSXbmUQWSkIQ1CHAA0Md-_XQDpETJkmyPHNcs4wg8gL670ejOdfl8caWrSjiycq62UXoZ0Rv4Wwq3avJBoSt4kPKhG85ro__LCwePhV8HN-PRKGfTImNJSWk8GQ2H6SRJimIynC7iPE4pjyes5Jw8MksWulElcZrknBitHSlYYznRC8JIoY0B2IpbSx5XXJHb79fXfpXhObO8JFrB0hpn39399pU4w_mA_FzxraXcGG2IXelHS5oa1zBFFnUyJo5bQKirWkgAtoBZT9l4QG4V-Xp3-52AFIQltuaFWAiY4MmVEuBE8SyKL6Nx3P75x0WjigH-kGgYO66sNnNEE9EsolMSTb6EeYREdCRU3TgSpTPSOCEHjQLIJcslnxdaWceUIyWA4FF6FY2-xIM4olckCQMNwxCG0SxKrwHMJQkIYfrwaQE40-seMsNtIwO2iNJqJfWAP9VaceUEk_DKkxho8qQCPHj1EiR8O0eEB5EVK17cI3AQ_pzJSls3538EngKSQAvS3-NvzdgkyVK8mwzSbJrg3Wg4GE2z5CinLckHqTLcNUa1SpvM9mrv5wp0DX_OMGUlc9xbpQNbCjoXahmM7Pb7YC-AKP1Cvuqykfx21op6Mu-ZwbwUtmauWM2B1wk57QoorW5MwecLMF_FKt4p-CBWeipCx8ySg2UyxyR71s3apvh5BRrg5zWdxHCTUv-Dj8n2I96Nh_7nXHQ3iwwXJTQ7VxnejLs18O1HgmZzAuWns-uMqOVauhAi5kBXo-6VflTrkbNcnHO5-Hxa33F1DvFpqDCqQFCer5ic8ydeNM7HOK4ehNGqggA0f4jnwYzcc80xShIBuqeX6Pxwh4GoGzL_etp93HncM8l_6NMA21UBewI4R0C7DgZH5vRJg1iVkSeCljua4fJPkeVfvo4qYR0ZYLtx_JAertYD7mSbYe-3rPe7UWK20QUKjl7-asEdZftRm_ul0U19Et99ntDwOkP6lWx98hUYKnkhmeGBY8xsKgbbYYGeNh6ib8W4_X4w0lOJfdCiRFoXoP8uJnw8rZ90nSqShVCgPuW4UUwGax3GB3OE7I0hGyWqNJMCsmL4XSoCCXUQ8XbIPR5xDoFJjoDZ9uDA5nFo1KeINCnzJQ6pd_OP0QpAH_rwITUr38X4e8XUSSUwlgUAHUuTHj0jTw9_glS2cA9MNvwdmDxD_rAR72B4iyjGG1G0IXITKz1phxn4GF1MPAG5cAWDs93a8ceYzAPzi3VO8BF4P-3qMZh5BmtnnAaf7ngKrAOPfoP9OP56iKceMYPTehv4vQLH6f-hONdXj70k9vyJoqoJ_6PjcYrcvdkR_m7X-lQOW_Z6T8SyzYBZ21R-a0wC8z5j_jAu-3JN_gbR8Rdt-n026alBN0n2Rd1pH0fqccCRlEuOGzFEgf1hNqFtsk2SI-CGrwTrJD0s0D6c0f6YC_APBd2t5eODEQ0g7wtpW6snu2Ep5Om7cWlrTbbP1WELfenr07e7UPbChbZwTreEtBu2sWgF50r_GgtaR-RF455D7Sy7egkoIN8okh6BHFzVs7oNZ800f6oHD8NFKGruTEHSjsqA0kMyCIp-qxCs04a_oBDpPygBSl-XgOHOZ8f7TWBTKlhX-24aVTihFbl0zmCtHLJsVmDtQOTooJVW8hnyURRozop7uMUDCQz2WRUwNOpRgOE-Cin7JcrdU8wLc8OVkNevTzQRTeNjZL1OguGsVBrOCVjtbiTzkeogYX_JODpKk74YGZAo8sZx65kINTY8gp8oyM2BfBtDssZwukzIrknQxIsB7GZQNG25MMGyP4Wovqk0hVmVLxUPFpIt7fZc-hJuEAxsHE6AbnHq7PYq9C3-o7Dpkkmmlg1bclT37Pf53eW3f8yv0JCxOBzq-WGDqQ0gLrgJ72glhYmCdwj7r9qJSvyPl76abhqOr02j4C3_jRsLRoVfvIPwSlh88U8QFb68aaSc8bxZYgjo6E46zma3N0gHzbpadYv-cKnadwgENnG0eW5n-14U7SOgPdGh73a-DdM8MeRWLTTpaKddXYekfX2ke6T7o8lBUkvDKiD6zQRLoe5BCd_eOt8WuvYzFUhvR1vDFqB_9rsZFqjaj6P16rv-DFvfoD15I7j98W98mOH5X_h4EE2u1q87VbeOjYsbtKQA3msYrJwB8PKbLrltP4z7wh--pl3hyy832GazMHKKT0-SzXXtnzc9KC-fQc8Y96i-sQa7jui32JHUeok4b1Yaw-KNYQ_CrhjI32BzMqDespXRhlzQrtENqJv_RJnSDEW7l8dx3zd7NjPZALsDipw3r6xTl2eh0LKpvMMk28qGVVkfR9aDxZ8E8PcFuLxvZdpflu6zEbCqwoAyFXPatP65AT49Smi6Q2h6hNCddpf__V24FSk-rWMMWNr4ZXdac77Ju5dQcsOEbExr6fgA8c3fX_s2JS-J4sxAJthAfuCesZsMsC3sv5jF2wG50gq82FlSasCrtCMVevGgSxzkyrO9LtO17cfZ-XSSkBj_rVOMAzOT0PqcjLDxOY3HNLQ90-wV0WMHzvl2t9cA3vqedg3ZAlAeJp6VF2k5TafszAknedfgdyvmCCtLS8CnmMN2OBbAbVPX2rR9eLunkV7qR2UdbI7VWWPkxbu1DtsGSBVuRuNkND5bXaRDCsQxPk0nlNHxYrooh7Qc5uM4i7O8GJ1JlnNpL6IR7I5U8UfiQaB3j2Zn4oLGYDRj-D9M0jQd8HSc8zwvFkVcTDM2hOyEV6D1gd92tVmemQtPEmwPFlMXiPt28xEkB0ki5y26eqV5zh9hd23xsQbEbi6-MjCR1fdN0DnzRF14pv4EZiKgrQ">