AnsweredAssumed Answered

Generating HSAIL and brig files using HSAIL-HLC tools

Question asked by oamoros on Apr 23, 2015

Hi!

 

I'm trying to generate brig files from cl files that work with the stable version of the HSA runtime.

 

1 Reading on the CLOC web page I understand that version 0.8.xx is still using the previous non-final version of the runtime, so brig file generated with it might not work with the stable-final version of the runtime.

 

2 I understand that this: HSAFoundation/HSAIL-HLC-Stable · GitHub is generating HSAIL and brig files from cl files, but not in a so convenient way as CLOC.

 

Please correct me if I'm wrong on point 1 or 2

 

3 I modified the run_tests script found on the tests folder of HSAIL-HLC-Stable, to compile OpenCL 1.2 files since I have OpenCL 1.2 installed. Specifically, I'm generating brig and hsail files for vector_copy.cl found on CLOC project folders. I pretend that the vector_copy example found on the HSA-Runtime-AMD project folders, execute's using a brig file generated from the cl file. Brig file generated as I explained before.

     - So I have a full vector_copy example with hsail and brig files working with the stable HSA runtime.

     - A vector_copy.cl file and vector_copy.brig and hsail files, generated with HSAIL-HLC-Stable compilers and linkers  

 

The generated hsail file looks quite ok. The HSAIL kernel has two parameters as the cl and original hsail files. I had to change the kernel name on the example HSA host code, to fit the generated kernel. But still, I get a segmentation fault when using this generated brig file.

 

Script generating the hsail and brig files:

 

#!/bin/bash

ROOT=$HOME/GitObsidian/HSAIL-HLC-Stable

PATH=$ROOT/bin:$PATH

echo "path  $PATH"

#echo "`which llc`"

oclf="vector_copy.cl"

fname=`echo "$oclf" | cut -d'.' -f1`

echo " Compiling $fname "

clc  --support_all_extension --opencl=1.2 -o results/$fname.fe.ll $oclf

#clc2 -cl-std=CL2.0 $oclf

#cp $fname.bc results/$fname.fe.bc

#llvm-dis  -o results/$fname.fe.ll results/$fname.fe.bc

echo "FE complete"

llvm-as  -o results/$fname.bc results/$fname.fe.ll

cp results/$fname.bc results/$fname.linked.bc

#llvm-link  -prelink-opt -o results/$fname.linked.bc results/$fname.fe.bc  -l $ROOT/bin/builtins-hsail.bc

echo "llvm-link completed!"

llvm-dis  -o results/$fname.linked.ll results/$fname.linked.bc

opt  -O3 -gpu -whole -verify results/$fname.linked.bc -o results/$fname.opt.bc

echo "opt complete!"

llvm-dis  -o results/$fname.opt.ll results/$fname.opt.bc

llc  -O2 -march=hsail-64 -filetype=obj -o results/$fname.brig  results/$fname.opt.bc

echo "llc complete"

hsailasm  -disassemble -o results/$fname.hsail  results/$fname.brig

echo "HSAIL generated!"

echo ""

 

hsail file generated:

 

version 0:20140528:$full:$large;

extension "amd:gcn";                                                                                                      

extension "IMAGE";

 

decl prog function &get_global_id(arg_u32 %ret)(arg_u32 %arg_p0);

 

decl prog function &abort()();

 

prog kernel &__OpenCL_vector_copy_kernel(                                                                                 

        kernarg_u64 %in,

        kernarg_u64 %out)

{

        pragma  "AMD RTI", "ARGSTART:__OpenCL_vector_copy_kernel";

        pragma  "AMD RTI", "version:3:1:104";

        pragma  "AMD RTI", "device:generic";

        pragma  "AMD RTI", "uniqueid:1024";

        pragma  "AMD RTI", "memory:private:0";

        pragma  "AMD RTI", "memory:region:0";

        pragma  "AMD RTI", "memory:local:0";

        pragma  "AMD RTI", "pointer:in:i32:1:1:0:uav:7:4:RW:0:0";

        pragma  "AMD RTI", "pointer:out:i32:1:1:16:uav:7:4:RW:0:0";                                                       

        pragma  "AMD RTI", "function:1:0";                                                                                

        pragma  "AMD RTI", "memory:64bitABI";                                                                             

        pragma  "AMD RTI", "privateid:8";                                                                                 

        pragma  "AMD RTI", "enqueue_kernel:0";                                                                            

        pragma  "AMD RTI", "kernel_index:4294967295";                                                                     

        pragma  "AMD RTI", "reflection:0:int*";                                                                           

        pragma  "AMD RTI", "reflection:1:int*";                                                                           

        pragma  "AMD RTI", "ARGEND:__OpenCL_vector_copy_kernel";                                                          

 

@__OpenCL_vector_copy_kernel_entry:                                                                                       

        // BB#0:                                // %entry                                                                 

        mov_b32 $s0, 0;

        {

                arg_u32 %get_global_id;                                                                                   

                arg_u32 %__param_p0;

                st_arg_align(4)_u32     $s0, [%__param_p0];                                                               

                call    &get_global_id (%get_global_id) (%__param_p0);                                                    

                ld_arg_align(4)_u32     $s0, [%get_global_id];                                                            

        }

        cvt_s64_s32     $d0, $s0;

        shl_u64 $d0, $d0, 2;                                                                                              

        ld_kernarg_align(8)_width(all)_u64      $d1, [%out];                                                              

        add_u64 $d1, $d1, $d0;

        ld_kernarg_align(8)_width(all)_u64      $d2, [%in];

        add_u64 $d0, $d2, $d0;

        ld_global_align(4)_u32  $s0, [$d0];

        st_global_align(4)_u32  $s0, [$d1];

        ret;

};

 

Original and "working" hsail file:

 

/* Copyright 2014 HSA Foundation Inc.  All Rights Reserved.

*

* HSAF is granting you permission to use this software and documentation (if

* any) (collectively, the "Materials") pursuant to the terms and conditions

* of the Software License Agreement included with the Materials.  If you do

* not have a copy of the Software License Agreement, contact the  HSA Foundation for a copy.

* Redistribution and use in source and binary forms, with or without

* modification, are permitted provided that the following conditions

* are met:

* 1. Redistributions of source code must retain the above copyright

*    notice, this list of conditions and the following disclaimer.

* 2. Redistributions in binary form must reproduce the above copyright

*    notice, this list of conditions and the following disclaimer in the

*    documentation and/or other materials provided with the distribution

* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR

* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS

* FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE

* CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER

* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING

* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE SOFTWARE.

*/

 

module &m:1:0:$full:$large:$default;

 

decl prog function &abort()();

 

prog kernel &__vector_copy_kernel(

        kernarg_u64 %a,

        kernarg_u64 %b)

{

@__vector_copy_kernel_entry:

        // BB#0:                                // %entry

        workitemabsid_u32       $s0, 0;

        cvt_s64_s32     $d0, $s0;

        shl_u64 $d0, $d0, 2;

        ld_kernarg_align(8)_width(all)_u64      $d1, [%b];

        add_u64 $d1, $d1, $d0;

        ld_kernarg_align(8)_width(all)_u64      $d2, [%a];

        add_u64 $d0, $d2, $d0;

        ld_global_u32   $s0, [$d0];

        st_global_u32   $s0, [$d1];

        ret;

};

 

There are some differences between the two files, but I understand that the host code should only notice the change on the kernel name, which I already modified on the Host code.

 

When trying to run the generated brig file, I get a segmentation fault after creating the program and trying to add the module to the program:

 

/*

* Create hsa program.

*/

    hsa_ext_program_t program;

    memset(&program,0,sizeof(hsa_ext_program_t));

    err = hsa_ext_program_create(HSA_MACHINE_MODEL_LARGE, HSA_PROFILE_FULL, HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT, NULL, &program);

    check(Create the program, err);

 

/*

* Add the BRIG module to hsa program.

*/

    err = hsa_ext_program_add_module(program, module);

    check(Adding the brig module to the program, err);

 

Questions:

     1 Should all this that I'm trying to do, work at some point?

     2 Is any of the differences on the hsail code requiring a change on the HSA Host code, a part from the kernel name?

     3 Is the program configuration specified at the beginning of the kernel files supposed to match the parameters set on the hsa_ext_program_create() call?

 

Thank you very much.

Outcomes