diff options
Diffstat (limited to 'liboffloadmic/runtime/cean_util.cpp')
-rw-r--r-- | liboffloadmic/runtime/cean_util.cpp | 366 |
1 files changed, 366 insertions, 0 deletions
diff --git a/liboffloadmic/runtime/cean_util.cpp b/liboffloadmic/runtime/cean_util.cpp new file mode 100644 index 000000000000..3258d7f3ade7 --- /dev/null +++ b/liboffloadmic/runtime/cean_util.cpp @@ -0,0 +1,366 @@ +/* + Copyright (c) 2014 Intel Corporation. All Rights Reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + + +#include "cean_util.h" +#include "offload_common.h" + +// 1. allocate element of CeanReadRanges type +// 2. initialized it for reading consequently contiguous ranges +// described by "ap" argument +CeanReadRanges * init_read_ranges_arr_desc(const arr_desc *ap) +{ + CeanReadRanges * res; + + // find the max contiguous range + int64_t rank = ap->rank - 1; + int64_t length = ap->dim[rank].size; + for (; rank >= 0; rank--) { + if (ap->dim[rank].stride == 1) { + length *= (ap->dim[rank].upper - ap->dim[rank].lower + 1); + if (rank > 0 && length != ap->dim[rank - 1].size) { + break; + } + } + else { + break; + } + } + + res =(CeanReadRanges *)malloc(sizeof(CeanReadRanges) + + (ap->rank - rank) * sizeof(CeanReadDim)); + if (res == NULL) + LIBOFFLOAD_ERROR(c_malloc); + res->current_number = 0; + res->range_size = length; + res->last_noncont_ind = rank; + + // calculate number of contiguous ranges inside noncontiguous dimensions + int count = 1; + bool prev_is_cont = true; + int64_t offset = 0; + + for (; rank >= 0; rank--) { + res->Dim[rank].count = count; + res->Dim[rank].size = ap->dim[rank].stride * ap->dim[rank].size; + count *= (prev_is_cont && ap->dim[rank].stride == 1? 1 : + (ap->dim[rank].upper - ap->dim[rank].lower + + ap->dim[rank].stride) / ap->dim[rank].stride); + prev_is_cont = false; + offset +=(ap->dim[rank].lower - ap->dim[rank].lindex) * + ap->dim[rank].size; + } + res->range_max_number = count; + res -> ptr = (void*)ap->base; + res -> init_offset = offset; + return res; +} + +// check if ranges described by 1 argument could be transfered into ranges +// described by 2-nd one +bool cean_ranges_match( + CeanReadRanges * read_rng1, + CeanReadRanges * read_rng2 +) +{ + return ( read_rng1 == NULL || read_rng2 == NULL || + (read_rng1->range_size % read_rng2->range_size == 0 || + read_rng2->range_size % read_rng1->range_size == 0)); +} + +// Set next offset and length and returns true for next range. +// Returns false if the ranges are over. +bool get_next_range( + CeanReadRanges * read_rng, + int64_t *offset +) +{ + if (++read_rng->current_number > read_rng->range_max_number) { + read_rng->current_number = 0; + return false; + } + int rank = 0; + int num = read_rng->current_number - 1; + int64_t cur_offset = 0; + int num_loc; + for (; rank <= read_rng->last_noncont_ind; rank++) { + num_loc = num / read_rng->Dim[rank].count; + cur_offset += num_loc * read_rng->Dim[rank].size; + num = num % read_rng->Dim[rank].count; + } + *offset = cur_offset + read_rng->init_offset; + return true; +} + +bool is_arr_desc_contiguous(const arr_desc *ap) +{ + int64_t rank = ap->rank - 1; + int64_t length = ap->dim[rank].size; + for (; rank >= 0; rank--) { + if (ap->dim[rank].stride > 1 && + ap->dim[rank].upper - ap->dim[rank].lower != 0) { + return false; + } + else if (length != ap->dim[rank].size) { + for (; rank >= 0; rank--) { + if (ap->dim[rank].upper - ap->dim[rank].lower != 0) { + return false; + } + } + return true; + } + length *= (ap->dim[rank].upper - ap->dim[rank].lower + 1); + } + return true; +} + +int64_t cean_get_transf_size(CeanReadRanges * read_rng) +{ + return(read_rng->range_max_number * read_rng->range_size); +} + +static uint64_t last_left, last_right; +typedef void (*fpp)(const char *spaces, uint64_t low, uint64_t high, int esize); + +static void generate_one_range( + const char *spaces, + uint64_t lrange, + uint64_t rrange, + fpp fp, + int esize +) +{ + OFFLOAD_TRACE(3, + "%s generate_one_range(lrange=%p, rrange=%p, esize=%d)\n", + spaces, (void*)lrange, (void*)rrange, esize); + if (last_left == -1) { + // First range + last_left = lrange; + } + else { + if (lrange == last_right+1) { + // Extend previous range, don't print + } + else { + (*fp)(spaces, last_left, last_right, esize); + last_left = lrange; + } + } + last_right = rrange; +} + +static void generate_mem_ranges_one_rank( + const char *spaces, + uint64_t base, + uint64_t rank, + const struct dim_desc *ddp, + fpp fp, + int esize +) +{ + uint64_t lindex = ddp->lindex; + uint64_t lower = ddp->lower; + uint64_t upper = ddp->upper; + uint64_t stride = ddp->stride; + uint64_t size = ddp->size; + OFFLOAD_TRACE(3, + "%s " + "generate_mem_ranges_one_rank(base=%p, rank=%lld, lindex=%lld, " + "lower=%lld, upper=%lld, stride=%lld, size=%lld, esize=%d)\n", + spaces, (void*)base, rank, lindex, lower, upper, stride, size, esize); + if (rank == 1) { + uint64_t lrange, rrange; + if (stride == 1) { + lrange = base + (lower-lindex)*size; + rrange = lrange + (upper-lower+1)*size - 1; + generate_one_range(spaces, lrange, rrange, fp, esize); + } + else { + for (int i=lower-lindex; i<=upper-lindex; i+=stride) { + lrange = base + i*size; + rrange = lrange + size - 1; + generate_one_range(spaces, lrange, rrange, fp, esize); + } + } + } + else { + for (int i=lower-lindex; i<=upper-lindex; i+=stride) { + generate_mem_ranges_one_rank( + spaces, base+i*size, rank-1, ddp+1, fp, esize); + + } + } +} + +static void generate_mem_ranges( + const char *spaces, + const arr_desc *adp, + bool deref, + fpp fp +) +{ + uint64_t esize; + + OFFLOAD_TRACE(3, + "%s " + "generate_mem_ranges(adp=%p, deref=%d, fp)\n", + spaces, adp, deref); + last_left = -1; + last_right = -2; + + // Element size is derived from last dimension + esize = adp->dim[adp->rank-1].size; + + generate_mem_ranges_one_rank( + // For c_cean_var the base addr is the address of the data + // For c_cean_var_ptr the base addr is dereferenced to get to the data + spaces, deref ? *((uint64_t*)(adp->base)) : adp->base, + adp->rank, &adp->dim[0], fp, esize); + (*fp)(spaces, last_left, last_right, esize); +} + +// returns offset and length of the data to be transferred +void __arr_data_offset_and_length( + const arr_desc *adp, + int64_t &offset, + int64_t &length +) +{ + int64_t rank = adp->rank - 1; + int64_t size = adp->dim[rank].size; + int64_t r_off = 0; // offset from right boundary + + // find the rightmost dimension which takes just part of its + // range. We define it if the size of left rank is not equal + // the range's length between upper and lower boungaries + while (rank > 0) { + size *= (adp->dim[rank].upper - adp->dim[rank].lower + 1); + if (size != adp->dim[rank - 1].size) { + break; + } + rank--; + } + + offset = (adp->dim[rank].lower - adp->dim[rank].lindex) * + adp->dim[rank].size; + + // find gaps both from the left - offset and from the right - r_off + for (rank--; rank >= 0; rank--) { + offset += (adp->dim[rank].lower - adp->dim[rank].lindex) * + adp->dim[rank].size; + r_off += adp->dim[rank].size - + (adp->dim[rank + 1].upper - adp->dim[rank + 1].lindex + 1) * + adp->dim[rank + 1].size; + } + length = (adp->dim[0].upper - adp->dim[0].lindex + 1) * + adp->dim[0].size - offset - r_off; +} + +#if OFFLOAD_DEBUG > 0 + +void print_range( + const char *spaces, + uint64_t low, + uint64_t high, + int esize +) +{ + char buffer[1024]; + char number[32]; + + OFFLOAD_TRACE(3, "%s print_range(low=%p, high=%p, esize=%d)\n", + spaces, (void*)low, (void*)high, esize); + + if (console_enabled < 4) { + return; + } + OFFLOAD_TRACE(4, "%s values:\n", spaces); + int count = 0; + buffer[0] = '\0'; + while (low <= high) + { + switch (esize) + { + case 1: + sprintf(number, "%d ", *((char *)low)); + low += 1; + break; + case 2: + sprintf(number, "%d ", *((short *)low)); + low += 2; + break; + case 4: + sprintf(number, "%d ", *((int *)low)); + low += 4; + break; + default: + sprintf(number, "0x%016x ", *((uint64_t *)low)); + low += 8; + break; + } + strcat(buffer, number); + count++; + if (count == 10) { + OFFLOAD_TRACE(4, "%s %s\n", spaces, buffer); + count = 0; + buffer[0] = '\0'; + } + } + if (count != 0) { + OFFLOAD_TRACE(4, "%s %s\n", spaces, buffer); + } +} + +void __arr_desc_dump( + const char *spaces, + const char *name, + const arr_desc *adp, + bool deref +) +{ + OFFLOAD_TRACE(2, "%s%s CEAN expression %p\n", spaces, name, adp); + + if (adp != 0) { + OFFLOAD_TRACE(2, "%s base=%llx, rank=%lld\n", + spaces, adp->base, adp->rank); + + for (int i = 0; i < adp->rank; i++) { + OFFLOAD_TRACE(2, + "%s dimension %d: size=%lld, lindex=%lld, " + "lower=%lld, upper=%lld, stride=%lld\n", + spaces, i, adp->dim[i].size, adp->dim[i].lindex, + adp->dim[i].lower, adp->dim[i].upper, + adp->dim[i].stride); + } + // For c_cean_var the base addr is the address of the data + // For c_cean_var_ptr the base addr is dereferenced to get to the data + generate_mem_ranges(spaces, adp, deref, &print_range); + } +} +#endif // OFFLOAD_DEBUG |