[Libclc-dev] [PATCH v2 1/1] Implement generic mad_sat

Aaron Watry awatry at gmail.com
Tue Aug 5 13:06:06 PDT 2014


On Tue, Aug 5, 2014 at 2:58 PM, Matt Arsenault
<Matthew.Arsenault at amd.com> wrote:
> On 08/05/2014 12:51 PM, Aaron Watry wrote:
>>
>> Either way, I've successfully tested this version of the code with your
>> LLVM FlattenCFG.cpp patch and gotten successful unit test passes on CEDAR
>> (Radeon 5400). I believe that radeonsi will probably still fail due to the
>> ulong instruction selection issue that I noted yesterday
>
> What operation is not selecting? I thought most of those were taken care of
> already

I've attached the bitcode and resulting LLVM Error from the mad_sat
ulong2 test kernel.

The kernel source is:
kernel void test_2_mad_sat_ulong(global ulong* out, global ulong* in0,
global ulong* in1, global ulong* in2){
  vstore2(mad_sat(vload2(0, in0), vload2(0, in1), vload2(0, in2)), 0, out);
}

Note that it's likely that mad_sat is fine, and the mul_hi and/or
add_sat call embedded in mad_sat is actually where the issue is
generated.
-------------- next part --------------
; ModuleID = 'radeon'
target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24:64:64-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
target triple = "r600--"

; Function Attrs: nounwind
define void @test_2_mad_sat_ulong(i64 addrspace(1)* nocapture %out, i64 addrspace(1)* nocapture readonly %in0, i64 addrspace(1)* nocapture readonly %in1, i64 addrspace(1)* nocapture readonly %in2) #0 {
  %1 = load i64 addrspace(1)* %in0, align 8, !tbaa !6
  %2 = getelementptr inbounds i64 addrspace(1)* %in0, i64 1
  %3 = load i64 addrspace(1)* %2, align 8, !tbaa !6
  %4 = load i64 addrspace(1)* %in1, align 8, !tbaa !6
  %5 = getelementptr inbounds i64 addrspace(1)* %in1, i64 1
  %6 = load i64 addrspace(1)* %5, align 8, !tbaa !6
  %7 = getelementptr inbounds i64 addrspace(1)* %in2, i64 1
  %8 = load i64 addrspace(1)* %7, align 8, !tbaa !6
  %9 = lshr i64 %1, 32
  %10 = and i64 %1, 4294967295
  %11 = lshr i64 %4, 32
  %12 = and i64 %4, 4294967295
  %13 = mul i64 %12, %9
  %14 = mul i64 %11, %10
  %15 = mul i64 %12, %10
  %16 = lshr i64 %15, 32
  %17 = add i64 %16, %14
  %18 = lshr i64 %13, 1
  %19 = lshr i64 %17, 1
  %20 = add nuw i64 %19, %18
  %21 = and i64 %13, 1
  %22 = and i64 %21, %17
  %23 = add i64 %20, %22
  %24 = lshr i64 %23, 31
  %25 = sub i64 0, %9
  %26 = mul i64 %11, %25
  %27 = icmp eq i64 %24, %26
  br i1 %27, label %28, label %_Z7mad_satmmm.exit.i

; <label>:28                                      ; preds = %0
  %29 = load i64 addrspace(1)* %in2, align 8, !tbaa !6
  %30 = mul i64 %4, %1
  %call.i.i.i1.i = tail call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %30, i64 %29) #2
  %res.i.i.i2.i = extractvalue { i64, i1 } %call.i.i.i1.i, 0
  %over.i.i.i3.i = extractvalue { i64, i1 } %call.i.i.i1.i, 1
  %sat.i.i.i4.i = select i1 %over.i.i.i3.i, i64 -1, i64 %res.i.i.i2.i
  br label %_Z7mad_satmmm.exit.i

_Z7mad_satmmm.exit.i:                             ; preds = %28, %0
  %.0.i.i = phi i64 [ %sat.i.i.i4.i, %28 ], [ -1, %0 ]
  %31 = lshr i64 %3, 32
  %32 = and i64 %3, 4294967295
  %33 = lshr i64 %6, 32
  %34 = and i64 %6, 4294967295
  %35 = mul i64 %34, %31
  %36 = mul i64 %33, %32
  %37 = mul i64 %34, %32
  %38 = lshr i64 %37, 32
  %39 = add i64 %38, %36
  %40 = lshr i64 %35, 1
  %41 = lshr i64 %39, 1
  %42 = add nuw i64 %41, %40
  %43 = and i64 %35, 1
  %44 = and i64 %43, %39
  %45 = add i64 %42, %44
  %46 = lshr i64 %45, 31
  %47 = sub i64 0, %31
  %48 = mul i64 %33, %47
  %49 = icmp eq i64 %46, %48
  br i1 %49, label %50, label %_Z7mad_satDv2_mS_S_.exit

; <label>:50                                      ; preds = %_Z7mad_satmmm.exit.i
  %51 = mul i64 %6, %3
  %call.i.i.i.i = tail call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %51, i64 %8) #2
  %res.i.i.i.i = extractvalue { i64, i1 } %call.i.i.i.i, 0
  %over.i.i.i.i = extractvalue { i64, i1 } %call.i.i.i.i, 1
  %sat.i.i.i.i = select i1 %over.i.i.i.i, i64 -1, i64 %res.i.i.i.i
  br label %_Z7mad_satDv2_mS_S_.exit

_Z7mad_satDv2_mS_S_.exit:                         ; preds = %50, %_Z7mad_satmmm.exit.i
  %.0.i1.i = phi i64 [ %sat.i.i.i.i, %50 ], [ -1, %_Z7mad_satmmm.exit.i ]
  store i64 %.0.i.i, i64 addrspace(1)* %out, align 8, !tbaa !6
  %52 = getelementptr inbounds i64 addrspace(1)* %out, i64 1
  store i64 %.0.i1.i, i64 addrspace(1)* %52, align 8, !tbaa !6
  ret void
}

; Function Attrs: nounwind readnone
declare { i64, i1 } @llvm.uadd.with.overflow.i64(i64, i64) #1

attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { nounwind readnone }
attributes #2 = { nounwind }

!opencl.kernels = !{!0, !1, !2, !3, !4}
!llvm.ident = !{!5, !5, !5, !5, !5, !5, !5, !5, !5, !5, !5, !5, !5, !5, !5, !5, !5, !5, !5, !5, !5, !5, !5, !5, !5, !5, !5, !5, !5, !5, !5, !5, !5, !5, !5, !5, !5, !5, !5, !5, !5, !5, !5, !5, !5, !5, !5, !5, !5, !5, !5, !5, !5, !5, !5, !5, !5, !5}

!0 = metadata !{null}
!1 = metadata !{void (i64 addrspace(1)*, i64 addrspace(1)*, i64 addrspace(1)*, i64 addrspace(1)*)* @test_2_mad_sat_ulong}
!2 = metadata !{null}
!3 = metadata !{null}
!4 = metadata !{null}
!5 = metadata !{metadata !"clang version 3.6.0 (trunk 214662) (llvm/trunk 214660)"}
!6 = metadata !{metadata !7, metadata !7, i64 0}
!7 = metadata !{metadata !"long", metadata !8, i64 0}
!8 = metadata !{metadata !"omnipotent char", metadata !9, i64 0}
!9 = metadata !{metadata !"Simple C/C++ TBAA"}
LLVM ERROR: Cannot select: 0xc45010: i64 = and 0xc45430, 0xc46440 [ORD=16] [ID=62]
  0xc45430: i64 = any_extend 0xc45220 [ORD=15] [ID=61]
    0xc45220: i32 = and 0xc741b8, 0x164e968 [ORD=15] [ID=59]
      0xc741b8: i32 = mul 0xc3f4d0, 0xc745d8 [ORD=7] [ID=32]
        0xc3f4d0: i32 = truncate 0x1651250 [ORD=7] [ID=24]
          0x1651250: i64,ch = CopyFromReg 0xbe4730, 0xc41238 [ORD=5] [ID=19]
            0xc41238: i64 = Register %vreg3 [ID=5]
        0xc745d8: i32 = truncate 0xc41028 [ORD=7] [ID=27]
          0xc41028: i64 = srl 0x1650358, 0x1650250 [ORD=3] [ID=23]
            0x1650358: i64,ch = CopyFromReg 0xbe4730, 0x1650040 [ORD=3] [ID=18]
              0x1650040: i64 = Register %vreg1 [ID=3]
            0x1650250: i32 = Constant<32> [ID=4]
      0x164e968: i32 = truncate 0x164ec10 [ORD=15] [ID=56]
        0x164ec10: i64 = add 0x1651988, 0xc73c90 [ORD=11] [ID=54]
          0x1651988: i64 = zero_extend 0xc742c0 [ORD=10] [ID=35]
            0xc742c0: i32 = mulhu 0xc3f4d0, 0xc42140 [ORD=9] [ID=29]
              0xc3f4d0: i32 = truncate 0x1651250 [ORD=7] [ID=24]
                0x1651250: i64,ch = CopyFromReg 0xbe4730, 0xc41238 [ORD=5] [ID=19]
                  0xc41238: i64 = Register %vreg3 [ID=5]
              0xc42140: i32 = truncate 0x1650358 [ORD=8] [ID=22]
                0x1650358: i64,ch = CopyFromReg 0xbe4730, 0x1650040 [ORD=3] [ID=18]
                  0x1650040: i64 = Register %vreg1 [ID=3]
          0xc73c90: i64 = or 0xc46020, 0xc46758 [ORD=8] [ID=51]
            0xc46020: i64 = zero_extend 0xc46650 [ORD=8] [ID=44]
              0xc46650: i32 = mul 0xc42560, 0xc42140 [ORD=8] [ID=37]
                0xc42560: i32 = truncate 0xc40f20 [ORD=20] [ID=30]
                  0xc40f20: i64 = srl 0x1651250, 0x1650250 [ORD=5] [ID=25]


                0xc42140: i32 = truncate 0x1650358 [ORD=8] [ID=22]
                  0x1650358: i64,ch = CopyFromReg 0xbe4730, 0x1650040 [ORD=3] [ID=18]

            0xc46758: i64 = shl 0xc46230, 0x1650250 [ORD=8] [ID=48]
              0xc46230: i64 = any_extend 0xc45958 [ORD=8] [ID=43]
                0xc45958: i32 = mulhu 0xc42560, 0xc42140 [ORD=8] [ID=36]
                  0xc42560: i32 = truncate 0xc40f20 [ORD=20] [ID=30]

                  0xc42140: i32 = truncate 0x1650358 [ORD=8] [ID=22]

              0x1650250: i32 = Constant<32> [ID=4]
  0xc46440: i64 = Constant<1> [ID=6]
In function: test_2_mad_sat_ulong



More information about the Libclc-dev mailing list