summaryrefslogtreecommitdiff
path: root/liboffloadmic
diff options
context:
space:
mode:
authoriverbin <iverbin@138bc75d-0d04-0410-961f-82ee72b054a4>2015-09-08 15:39:59 +0000
committeriverbin <iverbin@138bc75d-0d04-0410-961f-82ee72b054a4>2015-09-08 15:39:59 +0000
commit5553d0c3c6e8190b1738d56153b398e0da43fcfa (patch)
treec8f48559f50e5a137fcce6649e6afa8f5093251f /liboffloadmic
parent34a10ece8759712c038866759f74c57596557d36 (diff)
Merge liboffloadmic from upstream, version 20150803.
liboffloadmic/ * Makefile.am (liboffloadmic_host_la_DEPENDENCIES): Remove libcoi_host and libmyo-client. liboffloadmic_host loads them dynamically. * Makefile.in: Regenerate. * doc/doxygen/header.tex: Merge from upstream, version 20150803 <https://openmprtl.org/sites/default/files/liboffload_oss_20150803.tgz>. * runtime/cean_util.cpp: Likewise. * runtime/cean_util.h: Likewise. * runtime/coi/coi_client.cpp: Likewise. * runtime/coi/coi_client.h: Likewise. * runtime/coi/coi_server.cpp: Likewise. * runtime/coi/coi_server.h: Likewise. * runtime/compiler_if_host.cpp: Likewise. * runtime/compiler_if_host.h: Likewise. * runtime/compiler_if_target.cpp: Likewise. * runtime/compiler_if_target.h: Likewise. * runtime/dv_util.cpp: Likewise. * runtime/dv_util.h: Likewise. * runtime/liboffload_error.c: Likewise. * runtime/liboffload_error_codes.h: Likewise. * runtime/liboffload_msg.c: Likewise. * runtime/liboffload_msg.h: Likewise. * runtime/mic_lib.f90: Likewise. * runtime/offload.h: Likewise. * runtime/offload_common.cpp: Likewise. * runtime/offload_common.h: Likewise. * runtime/offload_engine.cpp: Likewise. * runtime/offload_engine.h: Likewise. * runtime/offload_env.cpp: Likewise. * runtime/offload_env.h: Likewise. * runtime/offload_host.cpp: Likewise. * runtime/offload_host.h: Likewise. * runtime/offload_iterator.h: Likewise. * runtime/offload_myo_host.cpp: Likewise. * runtime/offload_myo_host.h: Likewise. * runtime/offload_myo_target.cpp: Likewise. * runtime/offload_myo_target.h: Likewise. * runtime/offload_omp_host.cpp: Likewise. * runtime/offload_omp_target.cpp: Likewise. * runtime/offload_orsl.cpp: Likewise. * runtime/offload_orsl.h: Likewise. * runtime/offload_table.cpp: Likewise. * runtime/offload_table.h: Likewise. * runtime/offload_target.cpp: Likewise. * runtime/offload_target.h: Likewise. * runtime/offload_target_main.cpp: Likewise. * runtime/offload_timer.h: Likewise. * runtime/offload_timer_host.cpp: Likewise. * runtime/offload_timer_target.cpp: Likewise. * runtime/offload_trace.cpp: Likewise. * runtime/offload_trace.h: Likewise. * runtime/offload_util.cpp: Likewise. * runtime/offload_util.h: Likewise. * runtime/ofldbegin.cpp: Likewise. * runtime/ofldend.cpp: Likewise. * runtime/orsl-lite/include/orsl-lite.h: Likewise. * runtime/orsl-lite/lib/orsl-lite.c: Likewise. * runtime/use_mpss2.txt: Likewise. * include/coi/common/COIEngine_common.h: Merge from upstream, MPSS version 3.5 <http://registrationcenter.intel.com/irc_nas/7445/mpss-src-3.5.tar>. * include/coi/common/COIEvent_common.h: Likewise. * include/coi/common/COIMacros_common.h: Likewise. * include/coi/common/COIPerf_common.h: Likewise. * include/coi/common/COIResult_common.h: Likewise. * include/coi/common/COISysInfo_common.h: Likewise. * include/coi/common/COITypes_common.h: Likewise. * include/coi/sink/COIBuffer_sink.h: Likewise. * include/coi/sink/COIPipeline_sink.h: Likewise. * include/coi/sink/COIProcess_sink.h: Likewise. * include/coi/source/COIBuffer_source.h: Likewise. * include/coi/source/COIEngine_source.h: Likewise. * include/coi/source/COIEvent_source.h: Likewise. * include/coi/source/COIPipeline_source.h: Likewise. * include/coi/source/COIProcess_source.h: Likewise. * include/myo/myo.h: Likewise. * include/myo/myoimpl.h: Likewise. * include/myo/myotypes.h: Likewise. * plugin/Makefile.am (myo_inc_dir): Remove. (libgomp_plugin_intelmic_la_CPPFLAGS): Do not define MYO_SUPPORT. (AM_CPPFLAGS): Likewise for offload_target_main. * plugin/Makefile.in: Regenerate. * runtime/emulator/coi_common.h: Update copyright years. (OFFLOAD_EMUL_KNC_NUM_ENV): Replace with ... (OFFLOAD_EMUL_NUM_ENV): ... this. (enum cmd_t): Add CMD_CLOSE_LIBRARY. * runtime/emulator/coi_device.cpp: Update copyright years. (COIProcessWaitForShutdown): Add space between string constants. Return handle to host in CMD_OPEN_LIBRARY. Support CMD_CLOSE_LIBRARY. * runtime/emulator/coi_device.h: Update copyright years. * runtime/emulator/coi_host.cpp: Update copyright years. (knc_engines_num): Replace with ... (num_engines): ... this. (init): Replace OFFLOAD_EMUL_KNC_NUM_ENV with OFFLOAD_EMUL_NUM_ENV. (COIEngineGetCount): Replace COI_ISA_KNC with COI_ISA_MIC, and knc_engines_num with num_engines. (COIEngineGetHandle): Likewise. (COIProcessCreateFromMemory): Add space between string constants. (COIProcessCreateFromFile): New function. (COIProcessLoadLibraryFromMemory): Rename arguments according to COIProcess_source.h. Return handle, received from target. (COIProcessUnloadLibrary): New function. (COIPipelineClearCPUMask): New function. (COIPipelineSetCPUMask): New function. (COIEngineGetInfo): New function. * runtime/emulator/coi_host.h: Update copyright years. * runtime/emulator/coi_version_asm.h: Regenerate. * runtime/emulator/coi_version_linker_script.map: Regenerate. * runtime/emulator/myo_client.cpp: Update copyright years. * runtime/emulator/myo_service.cpp: Update copyright years. (myoArenaRelease): New function. (myoArenaAcquire): New function. (myoArenaAlignedFree): New function. (myoArenaAlignedMalloc): New function. * runtime/emulator/myo_service.h: Update copyright years. * runtime/emulator/myo_version_asm.h: Regenerate. * runtime/emulator/myo_version_linker_script.map: Regenerate. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@227532 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'liboffloadmic')
-rw-r--r--liboffloadmic/ChangeLog124
-rw-r--r--liboffloadmic/Makefile.am2
-rw-r--r--liboffloadmic/Makefile.in3
-rw-r--r--liboffloadmic/doc/doxygen/header.tex2
-rw-r--r--liboffloadmic/include/coi/common/COIEngine_common.h6
-rw-r--r--liboffloadmic/include/coi/common/COIEvent_common.h84
-rw-r--r--liboffloadmic/include/coi/common/COIMacros_common.h153
-rw-r--r--liboffloadmic/include/coi/common/COIPerf_common.h2
-rw-r--r--liboffloadmic/include/coi/common/COIResult_common.h7
-rw-r--r--liboffloadmic/include/coi/common/COISysInfo_common.h126
-rw-r--r--liboffloadmic/include/coi/common/COITypes_common.h6
-rw-r--r--liboffloadmic/include/coi/sink/COIBuffer_sink.h53
-rw-r--r--liboffloadmic/include/coi/sink/COIPipeline_sink.h2
-rw-r--r--liboffloadmic/include/coi/sink/COIProcess_sink.h16
-rw-r--r--liboffloadmic/include/coi/source/COIBuffer_source.h830
-rw-r--r--liboffloadmic/include/coi/source/COIEngine_source.h31
-rw-r--r--liboffloadmic/include/coi/source/COIEvent_source.h127
-rw-r--r--liboffloadmic/include/coi/source/COIPipeline_source.h77
-rw-r--r--liboffloadmic/include/coi/source/COIProcess_source.h342
-rw-r--r--liboffloadmic/include/myo/myo.h2
-rw-r--r--liboffloadmic/include/myo/myoimpl.h109
-rw-r--r--liboffloadmic/include/myo/myotypes.h39
-rw-r--r--liboffloadmic/plugin/Makefile.am5
-rw-r--r--liboffloadmic/plugin/Makefile.in5
-rw-r--r--liboffloadmic/runtime/cean_util.cpp98
-rw-r--r--liboffloadmic/runtime/cean_util.h44
-rw-r--r--liboffloadmic/runtime/coi/coi_client.cpp126
-rw-r--r--liboffloadmic/runtime/coi/coi_client.h91
-rw-r--r--liboffloadmic/runtime/coi/coi_server.cpp32
-rw-r--r--liboffloadmic/runtime/coi/coi_server.h4
-rw-r--r--liboffloadmic/runtime/compiler_if_host.cpp170
-rw-r--r--liboffloadmic/runtime/compiler_if_host.h58
-rw-r--r--liboffloadmic/runtime/compiler_if_target.cpp2
-rw-r--r--liboffloadmic/runtime/compiler_if_target.h2
-rw-r--r--liboffloadmic/runtime/dv_util.cpp6
-rw-r--r--liboffloadmic/runtime/dv_util.h15
-rw-r--r--liboffloadmic/runtime/emulator/coi_common.h7
-rw-r--r--liboffloadmic/runtime/emulator/coi_device.cpp23
-rw-r--r--liboffloadmic/runtime/emulator/coi_device.h2
-rw-r--r--liboffloadmic/runtime/emulator/coi_host.cpp151
-rw-r--r--liboffloadmic/runtime/emulator/coi_host.h2
-rw-r--r--liboffloadmic/runtime/emulator/coi_version_asm.h81
-rw-r--r--liboffloadmic/runtime/emulator/coi_version_linker_script.map14
-rw-r--r--liboffloadmic/runtime/emulator/myo_client.cpp2
-rw-r--r--liboffloadmic/runtime/emulator/myo_service.cpp46
-rw-r--r--liboffloadmic/runtime/emulator/myo_service.h2
-rw-r--r--liboffloadmic/runtime/emulator/myo_version_asm.h32
-rw-r--r--liboffloadmic/runtime/emulator/myo_version_linker_script.map10
-rw-r--r--liboffloadmic/runtime/liboffload_error.c45
-rw-r--r--liboffloadmic/runtime/liboffload_error_codes.h45
-rw-r--r--liboffloadmic/runtime/liboffload_msg.c5
-rw-r--r--liboffloadmic/runtime/liboffload_msg.h352
-rw-r--r--liboffloadmic/runtime/mic_lib.f902
-rw-r--r--liboffloadmic/runtime/offload.h66
-rw-r--r--liboffloadmic/runtime/offload_common.cpp2
-rw-r--r--liboffloadmic/runtime/offload_common.h113
-rw-r--r--liboffloadmic/runtime/offload_engine.cpp428
-rw-r--r--liboffloadmic/runtime/offload_engine.h233
-rw-r--r--liboffloadmic/runtime/offload_env.cpp6
-rw-r--r--liboffloadmic/runtime/offload_env.h5
-rw-r--r--liboffloadmic/runtime/offload_host.cpp2246
-rw-r--r--liboffloadmic/runtime/offload_host.h161
-rw-r--r--liboffloadmic/runtime/offload_iterator.h103
-rw-r--r--liboffloadmic/runtime/offload_myo_host.cpp561
-rw-r--r--liboffloadmic/runtime/offload_myo_host.h78
-rw-r--r--liboffloadmic/runtime/offload_myo_target.cpp115
-rw-r--r--liboffloadmic/runtime/offload_myo_target.h52
-rw-r--r--liboffloadmic/runtime/offload_omp_host.cpp2
-rw-r--r--liboffloadmic/runtime/offload_omp_target.cpp40
-rw-r--r--liboffloadmic/runtime/offload_orsl.cpp4
-rw-r--r--liboffloadmic/runtime/offload_orsl.h12
-rw-r--r--liboffloadmic/runtime/offload_table.cpp220
-rw-r--r--liboffloadmic/runtime/offload_table.h284
-rw-r--r--liboffloadmic/runtime/offload_target.cpp20
-rw-r--r--liboffloadmic/runtime/offload_target.h10
-rw-r--r--liboffloadmic/runtime/offload_target_main.cpp2
-rw-r--r--liboffloadmic/runtime/offload_timer.h30
-rw-r--r--liboffloadmic/runtime/offload_timer_host.cpp2
-rw-r--r--liboffloadmic/runtime/offload_timer_target.cpp2
-rw-r--r--liboffloadmic/runtime/offload_trace.cpp104
-rw-r--r--liboffloadmic/runtime/offload_trace.h20
-rw-r--r--liboffloadmic/runtime/offload_util.cpp2
-rw-r--r--liboffloadmic/runtime/offload_util.h23
-rw-r--r--liboffloadmic/runtime/ofldbegin.cpp178
-rw-r--r--liboffloadmic/runtime/ofldend.cpp33
-rw-r--r--liboffloadmic/runtime/orsl-lite/include/orsl-lite.h2
-rw-r--r--liboffloadmic/runtime/orsl-lite/lib/orsl-lite.c2
-rw-r--r--liboffloadmic/runtime/use_mpss2.txt2
88 files changed, 7246 insertions, 1539 deletions
diff --git a/liboffloadmic/ChangeLog b/liboffloadmic/ChangeLog
index 67a3f6aee5d2..4a726c8636d8 100644
--- a/liboffloadmic/ChangeLog
+++ b/liboffloadmic/ChangeLog
@@ -1,3 +1,123 @@
+2015-09-08 Ilya Verbin <ilya.verbin@intel.com>
+
+ * Makefile.am (liboffloadmic_host_la_DEPENDENCIES): Remove libcoi_host
+ and libmyo-client. liboffloadmic_host loads them dynamically.
+ * Makefile.in: Regenerate.
+ * doc/doxygen/header.tex: Merge from upstream, version 20150803
+ <https://openmprtl.org/sites/default/files/liboffload_oss_20150803.tgz>.
+ * runtime/cean_util.cpp: Likewise.
+ * runtime/cean_util.h: Likewise.
+ * runtime/coi/coi_client.cpp: Likewise.
+ * runtime/coi/coi_client.h: Likewise.
+ * runtime/coi/coi_server.cpp: Likewise.
+ * runtime/coi/coi_server.h: Likewise.
+ * runtime/compiler_if_host.cpp: Likewise.
+ * runtime/compiler_if_host.h: Likewise.
+ * runtime/compiler_if_target.cpp: Likewise.
+ * runtime/compiler_if_target.h: Likewise.
+ * runtime/dv_util.cpp: Likewise.
+ * runtime/dv_util.h: Likewise.
+ * runtime/liboffload_error.c: Likewise.
+ * runtime/liboffload_error_codes.h: Likewise.
+ * runtime/liboffload_msg.c: Likewise.
+ * runtime/liboffload_msg.h: Likewise.
+ * runtime/mic_lib.f90: Likewise.
+ * runtime/offload.h: Likewise.
+ * runtime/offload_common.cpp: Likewise.
+ * runtime/offload_common.h: Likewise.
+ * runtime/offload_engine.cpp: Likewise.
+ * runtime/offload_engine.h: Likewise.
+ * runtime/offload_env.cpp: Likewise.
+ * runtime/offload_env.h: Likewise.
+ * runtime/offload_host.cpp: Likewise.
+ * runtime/offload_host.h: Likewise.
+ * runtime/offload_iterator.h: Likewise.
+ * runtime/offload_myo_host.cpp: Likewise.
+ * runtime/offload_myo_host.h: Likewise.
+ * runtime/offload_myo_target.cpp: Likewise.
+ * runtime/offload_myo_target.h: Likewise.
+ * runtime/offload_omp_host.cpp: Likewise.
+ * runtime/offload_omp_target.cpp: Likewise.
+ * runtime/offload_orsl.cpp: Likewise.
+ * runtime/offload_orsl.h: Likewise.
+ * runtime/offload_table.cpp: Likewise.
+ * runtime/offload_table.h: Likewise.
+ * runtime/offload_target.cpp: Likewise.
+ * runtime/offload_target.h: Likewise.
+ * runtime/offload_target_main.cpp: Likewise.
+ * runtime/offload_timer.h: Likewise.
+ * runtime/offload_timer_host.cpp: Likewise.
+ * runtime/offload_timer_target.cpp: Likewise.
+ * runtime/offload_trace.cpp: Likewise.
+ * runtime/offload_trace.h: Likewise.
+ * runtime/offload_util.cpp: Likewise.
+ * runtime/offload_util.h: Likewise.
+ * runtime/ofldbegin.cpp: Likewise.
+ * runtime/ofldend.cpp: Likewise.
+ * runtime/orsl-lite/include/orsl-lite.h: Likewise.
+ * runtime/orsl-lite/lib/orsl-lite.c: Likewise.
+ * runtime/use_mpss2.txt: Likewise.
+ * include/coi/common/COIEngine_common.h: Merge from upstream, MPSS
+ version 3.5
+ <http://registrationcenter.intel.com/irc_nas/7445/mpss-src-3.5.tar>.
+ * include/coi/common/COIEvent_common.h: Likewise.
+ * include/coi/common/COIMacros_common.h: Likewise.
+ * include/coi/common/COIPerf_common.h: Likewise.
+ * include/coi/common/COIResult_common.h: Likewise.
+ * include/coi/common/COISysInfo_common.h: Likewise.
+ * include/coi/common/COITypes_common.h: Likewise.
+ * include/coi/sink/COIBuffer_sink.h: Likewise.
+ * include/coi/sink/COIPipeline_sink.h: Likewise.
+ * include/coi/sink/COIProcess_sink.h: Likewise.
+ * include/coi/source/COIBuffer_source.h: Likewise.
+ * include/coi/source/COIEngine_source.h: Likewise.
+ * include/coi/source/COIEvent_source.h: Likewise.
+ * include/coi/source/COIPipeline_source.h: Likewise.
+ * include/coi/source/COIProcess_source.h: Likewise.
+ * include/myo/myo.h: Likewise.
+ * include/myo/myoimpl.h: Likewise.
+ * include/myo/myotypes.h: Likewise.
+ * plugin/Makefile.am (myo_inc_dir): Remove.
+ (libgomp_plugin_intelmic_la_CPPFLAGS): Do not define MYO_SUPPORT.
+ (AM_CPPFLAGS): Likewise for offload_target_main.
+ * plugin/Makefile.in: Regenerate.
+ * runtime/emulator/coi_common.h: Update copyright years.
+ (OFFLOAD_EMUL_KNC_NUM_ENV): Replace with ...
+ (OFFLOAD_EMUL_NUM_ENV): ... this.
+ (enum cmd_t): Add CMD_CLOSE_LIBRARY.
+ * runtime/emulator/coi_device.cpp: Update copyright years.
+ (COIProcessWaitForShutdown): Add space between string constants.
+ Return handle to host in CMD_OPEN_LIBRARY.
+ Support CMD_CLOSE_LIBRARY.
+ * runtime/emulator/coi_device.h: Update copyright years.
+ * runtime/emulator/coi_host.cpp: Update copyright years.
+ (knc_engines_num): Replace with ...
+ (num_engines): ... this.
+ (init): Replace OFFLOAD_EMUL_KNC_NUM_ENV with OFFLOAD_EMUL_NUM_ENV.
+ (COIEngineGetCount): Replace COI_ISA_KNC with COI_ISA_MIC, and
+ knc_engines_num with num_engines.
+ (COIEngineGetHandle): Likewise.
+ (COIProcessCreateFromMemory): Add space between string constants.
+ (COIProcessCreateFromFile): New function.
+ (COIProcessLoadLibraryFromMemory): Rename arguments according to
+ COIProcess_source.h. Return handle, received from target.
+ (COIProcessUnloadLibrary): New function.
+ (COIPipelineClearCPUMask): New function.
+ (COIPipelineSetCPUMask): New function.
+ (COIEngineGetInfo): New function.
+ * runtime/emulator/coi_host.h: Update copyright years.
+ * runtime/emulator/coi_version_asm.h: Regenerate.
+ * runtime/emulator/coi_version_linker_script.map: Regenerate.
+ * runtime/emulator/myo_client.cpp: Update copyright years.
+ * runtime/emulator/myo_service.cpp: Update copyright years.
+ (myoArenaRelease): New function.
+ (myoArenaAcquire): New function.
+ (myoArenaAlignedFree): New function.
+ (myoArenaAlignedMalloc): New function.
+ * runtime/emulator/myo_service.h: Update copyright years.
+ * runtime/emulator/myo_version_asm.h: Regenerate.
+ * runtime/emulator/myo_version_linker_script.map: Regenerate.
+
2015-08-24 Nathan Sidwell <nathan@codesourcery.com>
* plugin/libgomp-plugin-intelmic.cpp (GOMP_OFFLOAD_version): New.
@@ -17,11 +137,11 @@
* configure: Reflects renaming of configure.in to configure.ac
2015-07-17 Nathan Sidwell <nathan@acm.org>
- Ilya Verbin <iverbin@gmail.com>
+ Ilya Verbin <ilya.verbin@intel.com>
* plugin/libgomp-plugin-intelmic.cpp (ImgDevAddrMap): Constify.
(offload_image, GOMP_OFFLOAD_load_image,
- OMP_OFFLOAD_unload_image): Constify target data.
+ GOMP_OFFLOAD_unload_image): Constify target data.
2015-07-08 Thomas Schwinge <thomas@codesourcery.com>
diff --git a/liboffloadmic/Makefile.am b/liboffloadmic/Makefile.am
index adc7c4cf55a6..b1454513a098 100644
--- a/liboffloadmic/Makefile.am
+++ b/liboffloadmic/Makefile.am
@@ -84,8 +84,6 @@ liboffloadmic_host_la_SOURCES = $(liboffloadmic_sources) \
liboffloadmic_host_la_CPPFLAGS = $(liboffloadmic_cppflags) -DHOST_LIBRARY=1
liboffloadmic_host_la_LDFLAGS = @lt_cv_dlopen_libs@ -version-info 5:0:0
-liboffloadmic_host_la_LIBADD = libcoi_host.la libmyo-client.la
-liboffloadmic_host_la_DEPENDENCIES = $(liboffloadmic_host_la_LIBADD)
liboffloadmic_target_la_SOURCES = $(liboffloadmic_sources) \
runtime/coi/coi_server.cpp \
diff --git a/liboffloadmic/Makefile.in b/liboffloadmic/Makefile.in
index 22b0193e13f8..74fb3d27180a 100644
--- a/liboffloadmic/Makefile.in
+++ b/liboffloadmic/Makefile.in
@@ -165,6 +165,7 @@ libmyo_service_la_LINK = $(LIBTOOL) --tag=CXX $(AM_LIBTOOLFLAGS) \
$(CXXFLAGS) $(libmyo_service_la_LDFLAGS) $(LDFLAGS) -o $@
@LIBOFFLOADMIC_HOST_FALSE@am_libmyo_service_la_rpath = -rpath \
@LIBOFFLOADMIC_HOST_FALSE@ $(toolexeclibdir)
+liboffloadmic_host_la_LIBADD =
am__objects_1 = liboffloadmic_host_la-dv_util.lo \
liboffloadmic_host_la-liboffload_error.lo \
liboffloadmic_host_la-liboffload_msg.lo \
@@ -445,8 +446,6 @@ liboffloadmic_host_la_SOURCES = $(liboffloadmic_sources) \
liboffloadmic_host_la_CPPFLAGS = $(liboffloadmic_cppflags) -DHOST_LIBRARY=1
liboffloadmic_host_la_LDFLAGS = @lt_cv_dlopen_libs@ -version-info 5:0:0
-liboffloadmic_host_la_LIBADD = libcoi_host.la libmyo-client.la
-liboffloadmic_host_la_DEPENDENCIES = $(liboffloadmic_host_la_LIBADD)
liboffloadmic_target_la_SOURCES = $(liboffloadmic_sources) \
runtime/coi/coi_server.cpp \
runtime/compiler_if_target.cpp \
diff --git a/liboffloadmic/doc/doxygen/header.tex b/liboffloadmic/doc/doxygen/header.tex
index b64a4636b9a9..eaa563ca29be 100644
--- a/liboffloadmic/doc/doxygen/header.tex
+++ b/liboffloadmic/doc/doxygen/header.tex
@@ -82,7 +82,7 @@ Notice revision \#20110804
Intel, Xeon, and Intel Xeon Phi are trademarks of Intel Corporation in the U.S. and/or other countries.
-This document is Copyright \textcopyright 2014, Intel Corporation. All rights reserved.
+This document is Copyright \textcopyright 2014-2015, Intel Corporation. All rights reserved.
\pagenumbering{roman}
\tableofcontents
diff --git a/liboffloadmic/include/coi/common/COIEngine_common.h b/liboffloadmic/include/coi/common/COIEngine_common.h
index 87123128cf9d..b8f31669b89f 100644
--- a/liboffloadmic/include/coi/common/COIEngine_common.h
+++ b/liboffloadmic/include/coi/common/COIEngine_common.h
@@ -1,5 +1,5 @@
/*
- * Copyright 2010-2013 Intel Corporation.
+ * Copyright 2010-2015 Intel Corporation.
*
* This library is free software; you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as published
@@ -64,7 +64,7 @@ extern "C" {
///////////////////////////////////////////////////////////////////////////////
///
-/// List of ISA types of supported engines.
+/// List of ISA types of supported engines.
///
typedef enum
{
@@ -89,7 +89,7 @@ typedef enum
/// [out] The zero-based index of this engine in the collection of
/// engines of the ISA returned in out_pType.
///
-/// @return COI_INVALID_POINTER if the any of the parameters are NULL.
+/// @return COI_INVALID_POINTER if any of the parameters are NULL.
///
/// @return COI_SUCCESS
///
diff --git a/liboffloadmic/include/coi/common/COIEvent_common.h b/liboffloadmic/include/coi/common/COIEvent_common.h
new file mode 100644
index 000000000000..21c93138a0cb
--- /dev/null
+++ b/liboffloadmic/include/coi/common/COIEvent_common.h
@@ -0,0 +1,84 @@
+/*
+ * Copyright 2010-2015 Intel Corporation.
+ *
+ * This library is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, version 2.1.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * Disclaimer: The codes contained in these modules may be specific
+ * to the Intel Software Development Platform codenamed Knights Ferry,
+ * and the Intel product codenamed Knights Corner, and are not backward
+ * compatible with other Intel products. Additionally, Intel will NOT
+ * support the codes or instruction set in future products.
+ *
+ * Intel offers no warranty of any kind regarding the code. This code is
+ * licensed on an "AS IS" basis and Intel is not obligated to provide
+ * any support, assistance, installation, training, or other services
+ * of any kind. Intel is also not obligated to provide any updates,
+ * enhancements or extensions. Intel specifically disclaims any warranty
+ * of merchantability, non-infringement, fitness for any particular
+ * purpose, and any other warranty.
+ *
+ * Further, Intel disclaims all liability of any kind, including but
+ * not limited to liability for infringement of any proprietary rights,
+ * relating to the use of the code, even if Intel is notified of the
+ * possibility of such liability. Except as expressly stated in an Intel
+ * license agreement provided with this code and agreed upon with Intel,
+ * no license, express or implied, by estoppel or otherwise, to any
+ * intellectual property rights is granted herein.
+ */
+
+#ifndef _COIEVENT_COMMON_H
+#define _COIEVENT_COMMON_H
+
+/** @ingroup COIEvent
+ * @addtogroup COIEventcommon
+@{
+* @file common/COIEvent_common.h
+*/
+#ifndef DOXYGEN_SHOULD_SKIP_THIS
+
+#include "../common/COITypes_common.h"
+#include "../common/COIResult_common.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+#endif // DOXYGEN_SHOULD_SKIP_THIS
+
+
+///////////////////////////////////////////////////////////////////////////////
+///
+/// Signal one shot user event. User events created on source can be
+/// signaled from both sink and source. This fires the event and wakes up
+/// threads waiting on COIEventWait.
+///
+/// Note: For events that are not registered or already signaled this call
+/// will behave as a NOP. Users need to make sure that they pass valid
+/// events on the sink side.
+///
+/// @param in_Event
+/// Event Handle to be signaled.
+///
+/// @return COI_INVAILD_HANDLE if in_Event was not a User event.
+///
+/// @return COI_ERROR if the signal fails to be sent from the sink.
+///
+/// @return COI_SUCCESS if the event was successfully signaled or ignored.
+///
+COIACCESSAPI
+COIRESULT COIEventSignalUserEvent(COIEVENT in_Event);
+///
+///
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#endif /* _COIEVENT_COMMON_H */
+
+/*! @} */
diff --git a/liboffloadmic/include/coi/common/COIMacros_common.h b/liboffloadmic/include/coi/common/COIMacros_common.h
index 6abddfedaf3d..07c9b8cb356b 100644
--- a/liboffloadmic/include/coi/common/COIMacros_common.h
+++ b/liboffloadmic/include/coi/common/COIMacros_common.h
@@ -1,5 +1,5 @@
/*
- * Copyright 2010-2013 Intel Corporation.
+ * Copyright 2010-2015 Intel Corporation.
*
* This library is free software; you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as published
@@ -41,12 +41,17 @@
#ifndef _COIMACROS_COMMON_H
#define _COIMACROS_COMMON_H
+#include <string.h>
+#include "../source/COIPipeline_source.h"
+#include "../common/COITypes_common.h"
+
/// @file common/COIMacros_common.h
/// Commonly used macros
// Note that UNUSUED_ATTR means that it is "possibly" unused, not "definitely".
// This should compile out in release mode if indeed it is unused.
#define UNUSED_ATTR __attribute__((unused))
+ #include <sched.h>
#ifndef UNREFERENCED_CONST_PARAM
#define UNREFERENCED_CONST_PARAM(P) { void* x UNUSED_ATTR = \
(void*)(uint64_t)P; \
@@ -66,4 +71,150 @@
#endif
+/* The following are static inline definitions of functions used for manipulating
+ COI_CPU_MASK info (The COI_CPU_MASK type is declared as an array of 16 uint64_t's
+ in COITypes_common.h "typedef uint64_t COI_CPU_MASK[16]").
+
+ These static inlined functions are intended on being roughly the same as the Linux
+ CPU_* macros defined in sched.h - with the important difference being a different
+ fundamental type difference: cpu_set_t versus COI_CPU_MASK.
+
+ The motivation for writing this code was to ease portability on the host side of COI
+ applications to both Windows and Linux.
+*/
+
+/* Roughly equivalent to CPU_ISSET(). */
+static inline uint64_t COI_CPU_MASK_ISSET(int bitNumber, const COI_CPU_MASK cpu_mask)
+{
+ if ((size_t)bitNumber < sizeof(COI_CPU_MASK)*8)
+ return ((cpu_mask)[bitNumber/64] & (((uint64_t)1) << (bitNumber%64)));
+ return 0;
+}
+
+/* Roughly equivalent to CPU_SET(). */
+static inline void COI_CPU_MASK_SET(int bitNumber, COI_CPU_MASK cpu_mask)
+{
+ if ((size_t)bitNumber < sizeof(COI_CPU_MASK)*8)
+ ((cpu_mask)[bitNumber/64] |= (((uint64_t)1) << (bitNumber%64)));
+}
+
+/* Roughly equivalent to CPU_ZERO(). */
+static inline void COI_CPU_MASK_ZERO(COI_CPU_MASK cpu_mask)
+{
+ memset(cpu_mask,0,sizeof(COI_CPU_MASK));
+}
+
+/* Roughly equivalent to CPU_AND(). */
+static inline void COI_CPU_MASK_AND(COI_CPU_MASK dst, const COI_CPU_MASK src1, const COI_CPU_MASK src2)
+{
+ const unsigned int loopIterations = sizeof(COI_CPU_MASK) / sizeof(dst[0]);
+
+ for(unsigned int i=0;i<loopIterations;++i)
+ dst[i] = src1[i] & src2[i];
+}
+
+/* Roughly equivalent to CPU_XOR(). */
+static inline void COI_CPU_MASK_XOR(COI_CPU_MASK dst, const COI_CPU_MASK src1, const COI_CPU_MASK src2)
+{
+ const unsigned int loopIterations = sizeof(COI_CPU_MASK) / sizeof(dst[0]);
+
+ for(unsigned int i=0;i<loopIterations;++i)
+ dst[i] = src1[i] ^ src2[i];
+}
+
+/* Roughly equivalent to CPU_OR(). */
+static inline void COI_CPU_MASK_OR(COI_CPU_MASK dst, const COI_CPU_MASK src1, const COI_CPU_MASK src2)
+{
+ const unsigned int loopIterations = sizeof(COI_CPU_MASK) / sizeof(dst[0]);
+
+ for(unsigned int i=0;i<loopIterations;++i)
+ dst[i] = src1[i] | src2[i];
+}
+
+/* Utility function for COI_CPU_MASK_COUNT() below. */
+static inline int __COI_CountBits(uint64_t n)
+{
+ int cnt=0;
+
+ for (;n;cnt++)
+ n &= (n-1);
+ return cnt;
+}
+
+/* Roughly equivalent to CPU_COUNT(). */
+static inline int COI_CPU_MASK_COUNT(const COI_CPU_MASK cpu_mask)
+{
+ int cnt=0;
+ const unsigned int loopIterations = sizeof(COI_CPU_MASK) / sizeof(cpu_mask[0]);
+
+ for(unsigned int i=0;i < loopIterations;++i)
+ {
+ cnt += __COI_CountBits(cpu_mask[i]);
+ }
+ return cnt;
+}
+
+/* Roughly equivalent to CPU_EQUAL(). */
+static inline int COI_CPU_MASK_EQUAL(const COI_CPU_MASK cpu_mask1,const COI_CPU_MASK cpu_mask2)
+{
+ const unsigned int loopIterations = sizeof(COI_CPU_MASK) / sizeof(cpu_mask1[0]);
+
+ for(unsigned int i=0;i < loopIterations;++i)
+ {
+ if (cpu_mask1[i] != cpu_mask2[i])
+ return 0;
+ }
+ return 1;
+}
+
+
+/* Utility function to translate from cpu_set * to COI_CPU_MASK. */
+static inline void COI_CPU_MASK_XLATE(COI_CPU_MASK dest,const cpu_set_t *src)
+{
+ COI_CPU_MASK_ZERO(dest);
+#if 0
+ /* Slightly slower version than the following #else/#endif block. Left here only to
+ document the intent of the code. */
+ for(unsigned int i=0;i < sizeof(cpu_set_t)*8;++i)
+ if (CPU_ISSET(i,src))
+ COI_CPU_MASK_SET(i,dest);
+#else
+ for(unsigned int i=0;i < sizeof(COI_CPU_MASK)/sizeof(dest[0]);++i)
+ {
+ for(unsigned int j=0;j < 64;++j)
+ {
+ if (CPU_ISSET(i*64+j,src))
+ dest[i] |= ((uint64_t)1) << j;
+ }
+ }
+#endif
+}
+
+/* Utility function to translate from COI_CPU_MASK to cpu_set *. */
+static inline void COI_CPU_MASK_XLATE_EX(cpu_set_t *dest,const COI_CPU_MASK src)
+{
+ CPU_ZERO(dest);
+#if 0
+ /* Slightly slower version than the following #else/#endif block. Left here only to
+ document the intent of the code. */
+ for(unsigned int i=0;i < sizeof(COI_CPU_MASK)*8;++i)
+ if (COI_CPU_MASK_ISSET(i,src))
+ CPU_SET(i,dest);
+#else
+ for(unsigned int i=0;i < sizeof(COI_CPU_MASK)/sizeof(src[0]);++i)
+ {
+ const uint64_t cpu_mask = src[i];
+
+ for(unsigned int j=0;j < 64;++j)
+ {
+ const uint64_t bit = ((uint64_t)1) << j;
+
+ if (bit & cpu_mask)
+ CPU_SET(i*64+j,dest);
+ }
+ }
+#endif
+}
+
+
#endif /* _COIMACROS_COMMON_H */
diff --git a/liboffloadmic/include/coi/common/COIPerf_common.h b/liboffloadmic/include/coi/common/COIPerf_common.h
index b81756f1cc1e..f542786452f0 100644
--- a/liboffloadmic/include/coi/common/COIPerf_common.h
+++ b/liboffloadmic/include/coi/common/COIPerf_common.h
@@ -1,5 +1,5 @@
/*
- * Copyright 2010-2013 Intel Corporation.
+ * Copyright 2010-2015 Intel Corporation.
*
* This library is free software; you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as published
diff --git a/liboffloadmic/include/coi/common/COIResult_common.h b/liboffloadmic/include/coi/common/COIResult_common.h
index df8a4f681499..947e93cbdc29 100644
--- a/liboffloadmic/include/coi/common/COIResult_common.h
+++ b/liboffloadmic/include/coi/common/COIResult_common.h
@@ -1,5 +1,5 @@
/*
- * Copyright 2010-2013 Intel Corporation.
+ * Copyright 2010-2015 Intel Corporation.
*
* This library is free software; you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as published
@@ -110,12 +110,13 @@ typedef enum COIRESULT
///< Offload Infrastructure on the host
///< is not compatible with the version
///< on the device.
- COI_BAD_PORT, ///< The port that the host is set to
+ COI_BAD_PORT, ///< The port that the host is set to
///< connect to is invalid.
COI_AUTHENTICATION_FAILURE, ///< The daemon was unable to authenticate
///< the user that requested an engine.
///< Only reported if daemon is set up for
- ///< authorization.
+ ///< authorization. Is also reported in
+ ///< Windows if host can not find user.
COI_NUM_RESULTS ///< Reserved, do not use.
}
COIRESULT;
diff --git a/liboffloadmic/include/coi/common/COISysInfo_common.h b/liboffloadmic/include/coi/common/COISysInfo_common.h
new file mode 100644
index 000000000000..0fae2312f2d6
--- /dev/null
+++ b/liboffloadmic/include/coi/common/COISysInfo_common.h
@@ -0,0 +1,126 @@
+/*
+ * Copyright 2010-2015 Intel Corporation.
+ *
+ * This library is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, version 2.1.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * Disclaimer: The codes contained in these modules may be specific
+ * to the Intel Software Development Platform codenamed Knights Ferry,
+ * and the Intel product codenamed Knights Corner, and are not backward
+ * compatible with other Intel products. Additionally, Intel will NOT
+ * support the codes or instruction set in future products.
+ *
+ * Intel offers no warranty of any kind regarding the code. This code is
+ * licensed on an "AS IS" basis and Intel is not obligated to provide
+ * any support, assistance, installation, training, or other services
+ * of any kind. Intel is also not obligated to provide any updates,
+ * enhancements or extensions. Intel specifically disclaims any warranty
+ * of merchantability, non-infringement, fitness for any particular
+ * purpose, and any other warranty.
+ *
+ * Further, Intel disclaims all liability of any kind, including but
+ * not limited to liability for infringement of any proprietary rights,
+ * relating to the use of the code, even if Intel is notified of the
+ * possibility of such liability. Except as expressly stated in an Intel
+ * license agreement provided with this code and agreed upon with Intel,
+ * no license, express or implied, by estoppel or otherwise, to any
+ * intellectual property rights is granted herein.
+ */
+
+#ifndef _COISYSINFO_COMMON_H
+#define _COISYSINFO_COMMON_H
+
+/** @ingroup COISysInfo
+ * @addtogroup COISysInfoCommon
+@{
+* @file common/COISysInfo_common.h
+* This interface allows developers to query the platform for system level
+* information. */
+
+#ifndef DOXYGEN_SHOULD_SKIP_THIS
+#include "../common/COITypes_common.h"
+#include <assert.h>
+#include <string.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+#endif // DOXYGEN_SHOULD_SKIP_THIS
+
+#define INITIAL_APIC_ID_BITS 0xFF000000 // EBX[31:24] unique APIC ID
+
+///////////////////////////////////////////////////////////////////////////////
+/// \fn uint32_t COISysGetAPICID(void)
+/// @return The Advanced Programmable Interrupt Controller (APIC) ID of
+/// the hardware thread on which the caller is running.
+///
+/// @warning APIC IDs are unique to each hardware thread within a processor,
+/// but may not be sequential.
+COIACCESSAPI
+uint32_t COISysGetAPICID(void);
+
+///////////////////////////////////////////////////////////////////////////////
+///
+/// @return The number of cores exposed by the processor on which the caller is
+/// running. Returns 0 if there is an error loading the processor info.
+COIACCESSAPI
+uint32_t COISysGetCoreCount(void);
+
+///////////////////////////////////////////////////////////////////////////////
+///
+/// @return The number of hardware threads exposed by the processor on which
+/// the caller is running. Returns 0 if there is an error loading processor
+/// info.
+COIACCESSAPI
+uint32_t COISysGetHardwareThreadCount(void);
+
+///////////////////////////////////////////////////////////////////////////////
+///
+/// @return The index of the hardware thread on which the caller is running.
+///
+/// The indexes of neighboring hardware threads will differ by a value of one
+/// and are within the range zero through COISysGetHardwareThreadCount()-1.
+/// Returns ((uint32_t)-1) if there was an error loading processor info.
+COIACCESSAPI
+uint32_t COISysGetHardwareThreadIndex(void);
+
+///////////////////////////////////////////////////////////////////////////////
+///
+/// @return The index of the core on which the caller is running.
+///
+/// The indexes of neighboring cores will differ by a value of one and are
+/// within the range zero through COISysGetCoreCount()-1. Returns ((uint32_t)-1)
+/// if there was an error loading processor info.
+COIACCESSAPI
+uint32_t COISysGetCoreIndex(void);
+
+///////////////////////////////////////////////////////////////////////////////
+///
+/// @return The number of level 2 caches within the processor on which the
+/// caller is running. Returns ((uint32_t)-1) if there was an error loading
+/// processor info.
+COIACCESSAPI
+uint32_t COISysGetL2CacheCount(void);
+
+///////////////////////////////////////////////////////////////////////////////
+///
+/// @return The index of the level 2 cache on which the caller is running.
+/// Returns ((uint32_t)-1) if there was an error loading processor info.
+///
+/// The indexes of neighboring cores will differ by a value of one and are
+/// within the range zero through COISysGetL2CacheCount()-1.
+COIACCESSAPI
+uint32_t COISysGetL2CacheIndex(void);
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+/*! @} */
+
+#endif /* _COISYSINFO_COMMON_H */
diff --git a/liboffloadmic/include/coi/common/COITypes_common.h b/liboffloadmic/include/coi/common/COITypes_common.h
index 78180dce81b4..001ddc979c08 100644
--- a/liboffloadmic/include/coi/common/COITypes_common.h
+++ b/liboffloadmic/include/coi/common/COITypes_common.h
@@ -1,5 +1,5 @@
/*
- * Copyright 2010-2013 Intel Corporation.
+ * Copyright 2010-2015 Intel Corporation.
*
* This library is free software; you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as published
@@ -73,8 +73,8 @@ typedef struct coimapinst * COIMAPINSTANCE;
typedef uint64_t COI_CPU_MASK[16];
-/**
- * On Windows, coi_wchar_t is a uint32_t. On Windows, wchar_t is 16 bits wide, and on Linux it is 32 bits wide, so uint32_t is used for portability.
+/**
+ * On Windows, coi_wchar_t is a uint32_t. On Windows, wchar_t is 16 bits wide, and on Linux it is 32 bits wide, so uint32_t is used for portability.
*/
typedef wchar_t coi_wchar_t;
diff --git a/liboffloadmic/include/coi/sink/COIBuffer_sink.h b/liboffloadmic/include/coi/sink/COIBuffer_sink.h
index 66d0549010d8..2e51e92f920a 100644
--- a/liboffloadmic/include/coi/sink/COIBuffer_sink.h
+++ b/liboffloadmic/include/coi/sink/COIBuffer_sink.h
@@ -1,5 +1,5 @@
/*
- * Copyright 2010-2013 Intel Corporation.
+ * Copyright 2010-2015 Intel Corporation.
*
* This library is free software; you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as published
@@ -45,7 +45,7 @@
* @addtogroup COIBufferSink
@{
-* @file sink\COIBuffer_sink.h
+* @file sink\COIBuffer_sink.h
*/
#ifndef DOXYGEN_SHOULD_SKIP_THIS
#include "../common/COITypes_common.h"
@@ -54,29 +54,29 @@
#ifdef __cplusplus
extern "C" {
-#endif
+#endif
//////////////////////////////////////////////////////////////////////////////
///
-/// Adds a reference to the memory of a buffer. The memory of the buffer
-/// will remain on the device until both a corresponding COIBufferReleaseRef()
+/// Adds a reference to the memory of a buffer. The memory of the buffer
+/// will remain on the device until both a corresponding COIBufferReleaseRef()
/// call is made and the run function that delivered the buffer returns.
///
-/// Intel® Coprocessor Offload Infrastructure (Intel® COI) streaming buffers should not be AddRef'd. Doing so may result in
-/// unpredictable results or may cause the sink process to crash.
+/// Running this API in a thread spawned within the run function is not
+/// supported and will cause unpredictable results and may cause data corruption.
///
-/// @warning 1.It is possible for enqueued run functions to be unable to
-/// execute due to all card memory being occupied by addref'ed
+/// @warning 1.It is possible for enqueued run functions to be unable to
+/// execute due to all card memory being occupied by AddRef'd
/// buffers. As such, it is important that whenever a buffer is
-/// addref'd that there be no dependencies on future run functions
+/// AddRef'd that there be no dependencies on future run functions
/// for progress to be made towards releasing the buffer.
-/// 2.It is important that AddRef is called within the scope of
-/// run function that carries the buffer to be addref'ed.
+/// 2.It is important that AddRef is called within the scope of
+/// run function that carries the buffer to be AddRef'd.
///
/// @param in_pBuffer
-/// [in] Pointer to the start of a buffer being addref'ed, that was
+/// [in] Pointer to the start of a buffer being AddRef'd, that was
/// passed in at the start of the run function.
-///
+///
/// @return COI_SUCCESS if the buffer ref count was successfully incremented.
///
/// @return COI_INVALID_POINTER if the buffer pointer is NULL.
@@ -90,30 +90,33 @@ COIBufferAddRef(
//////////////////////////////////////////////////////////////////////////////
///
-/// Removes a reference to the memory of a buffer. The memory of the buffer
+/// Removes a reference to the memory of a buffer. The memory of the buffer
/// will be eligible for being freed on the device when the following
/// conditions are met: the run function that delivered the buffer
-/// returns, and the number of calls to COIBufferReleaseRef() matches the
+/// returns, and the number of calls to COIBufferReleaseRef() matches the
/// number of calls to COIBufferAddRef().
+//
+/// Running this API in a thread spawned within the run function is not
+/// supported and will cause unpredictable results and may cause data corruption.
///
-/// @warning When a buffer is addref'ed it is assumed that it is in use and all
+/// @warning When a buffer is AddRef'd it is assumed that it is in use and all
/// other operations on that buffer waits for ReleaseRef() to happen.
-/// So you cannot pass the addref'ed buffer's handle to RunFunction
-/// that calls ReleaseRef(). This is a circular dependency and will
-/// cause a deadlock. Buffer's pointer (buffer's sink side
+/// So you cannot pass the AddRef'd buffer's handle to RunFunction
+/// that calls ReleaseRef(). This is a circular dependency and will
+/// cause a deadlock. Buffer's pointer (buffer's sink side
/// address/pointer which is different than source side BUFFER handle)
-/// needs to be stored somewhere to retrieve it later to use in
+/// needs to be stored somewhere to retrieve it later to use in
/// ReleaseRef.
///
/// @param in_pBuffer
-/// [in] Pointer to the start of a buffer previously addref'ed, that
+/// [in] Pointer to the start of a buffer previously AddRef'd, that
/// was passed in at the start of the run function.
-///
+///
/// @return COI_SUCCESS if the buffer refcount was successfully decremented.
///
/// @return COI_INVALID_POINTER if the buffer pointer was invalid.
///
-/// @return COI_INVALID_HANDLE if the buffer did not have COIBufferAddRef()
+/// @return COI_INVALID_HANDLE if the buffer did not have COIBufferAddRef()
/// previously called on it.
///
COIRESULT
@@ -123,7 +126,7 @@ COIBufferReleaseRef(
#ifdef __cplusplus
} /* extern "C" */
-#endif
+#endif
#endif /* _COIBUFFER_SINK_H */
diff --git a/liboffloadmic/include/coi/sink/COIPipeline_sink.h b/liboffloadmic/include/coi/sink/COIPipeline_sink.h
index ccfde205dea0..c70872ad12c9 100644
--- a/liboffloadmic/include/coi/sink/COIPipeline_sink.h
+++ b/liboffloadmic/include/coi/sink/COIPipeline_sink.h
@@ -1,5 +1,5 @@
/*
- * Copyright 2010-2013 Intel Corporation.
+ * Copyright 2010-2015 Intel Corporation.
*
* This library is free software; you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as published
diff --git a/liboffloadmic/include/coi/sink/COIProcess_sink.h b/liboffloadmic/include/coi/sink/COIProcess_sink.h
index 90603262c8b6..6e2ef19df187 100644
--- a/liboffloadmic/include/coi/sink/COIProcess_sink.h
+++ b/liboffloadmic/include/coi/sink/COIProcess_sink.h
@@ -1,5 +1,5 @@
/*
- * Copyright 2010-2013 Intel Corporation.
+ * Copyright 2010-2015 Intel Corporation.
*
* This library is free software; you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as published
@@ -63,10 +63,11 @@ extern "C" {
/// main() function from exiting until it is directed to by the source. When
/// the shutdown message is received this function will stop any future run
/// functions from executing but will wait for any current run functions to
-/// complete. All Intel® Coprocessor Offload Infrastructure (Intel® COI) resources will be cleaned up and no additional Intel® Coprocessor Offload Infrastructure (Intel® COI) APIs
-/// should be called after this function returns. This function does not
-/// invoke exit() so the application can perform any of its own cleanup once
-/// this call returns.
+/// complete. All Intel® Coprocessor Offload Infrastructure (Intel® COI)
+/// resources will be cleaned up and no additional Intel® Coprocessor Offload
+/// Infrastructure (Intel® COI) APIs should be called after this function
+/// returns. This function does not invoke exit() so the application
+/// can perform any of its own cleanup once this call returns.
///
/// @return COI_SUCCESS once the process receives the shutdown message.
///
@@ -86,8 +87,9 @@ COIProcessWaitForShutdown();
/// from this call.
///
/// @return COI_SUCCESS once the proxy output has been flushed to and written
-/// written by the host. Note that Intel® Coprocessor Offload Infrastructure (Intel® COI) on the source writes to stdout
-/// and stderr, but does not flush this output.
+/// written by the host. Note that Intel® Coprocessor Offload
+/// Infrastructure (Intel® COI) on the source writes to stdout and
+/// stderr, but does not flush this output.
/// @return COI_SUCCESS if the process was created without enabling
/// proxy IO this function.
///
diff --git a/liboffloadmic/include/coi/source/COIBuffer_source.h b/liboffloadmic/include/coi/source/COIBuffer_source.h
index 4a08856f5d16..bfd066ca916a 100644
--- a/liboffloadmic/include/coi/source/COIBuffer_source.h
+++ b/liboffloadmic/include/coi/source/COIBuffer_source.h
@@ -1,5 +1,5 @@
/*
- * Copyright 2010-2013 Intel Corporation.
+ * Copyright 2010-2015 Intel Corporation.
*
* This library is free software; you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as published
@@ -68,21 +68,16 @@ typedef enum COI_BUFFER_TYPE
/// Sink physical memory. Mapping the buffer may stall the pipelines.
COI_BUFFER_NORMAL = 1,
- /// A streaming buffer creates new versions each time it is passed to
- /// Runfunction. These new versions are consumed by run functions.
-
- /// To_SINK buffers are used to send data from SOURCE to SINK
- /// These buffers are SOURCE write only buffers. If read, won't
- /// get Data written by SINK
- COI_BUFFER_STREAMING_TO_SINK,
-
- /// To_SOURCE buffers are used to get data from SINK to SOURCE
- /// These buffers are SOURCE Read only buffers. If written, data
- /// won't get reflected on SINK side.
- COI_BUFFER_STREAMING_TO_SOURCE,
+ // Reserved values, not used by COI any more
+ COI_BUFFER_RESERVED_1,
+ COI_BUFFER_RESERVED_2,
/// A pinned buffer exists in a shared memory region and is always
/// available for read or write operations.
+ /// Note: Pinned Buffers larger than 4KB are not supported in
+ /// Windows 7 kernels.
+ /// The value of COI_BUFFER_PINNED is set to specific value
+ /// to maintain compatibility with older versions of COI
COI_BUFFER_PINNED,
/// OpenCL buffers are similar to Normal buffers except they don't
@@ -126,12 +121,15 @@ typedef enum COI_BUFFER_TYPE
/// check to see if this memory is read only. Ordinarily this is checked
/// and an error is thrown upon buffer creation. With this flag, the error
/// might occur later, and cause undetermined behavior. Be sure to always
-/// use writeable memory for COIBuffers.
+/// use writable memory for COIBuffers.
#define COI_OPTIMIZE_NO_DMA 0x00000040
/// Hint to the runtime to try to use huge page sizes for backing store on the
-/// sink. Is currently not compatible with PINNED buffers or the SAME_ADDRESS
-/// flags or the SINK_MEMORY flag.
+/// sink. Is currently not compatible with PINNED buffers or the SAME_ADDRESS
+/// flags or the SINK_MEMORY flag. It is important to note that this is a hint
+/// and internally the runtime may not actually promote to huge pages.
+/// Specifically if the buffer is too small (less than 4KiB for example) then
+/// the runtime will not promote the buffer to use huge pages.
#define COI_OPTIMIZE_HUGE_PAGE_SIZE 0x00000080
/// Used to tell Intel(R) Coprocessor Offload Infrastructure (Intel(R) COI)
@@ -167,12 +165,12 @@ COI_VALID_BUFFER_TYPES_AND_FLAGS[COI_BUFFER_OPENCL+1] = {
| ADDR | SINK | SRC | SRC | SINK | SINK | NO | PAGE | SINK |
| SINKS | SRC | READ | WRITE | READ | WRITE | DMA | SIZE | MEM |
+-------+------+------+-------+------+-------+-----+------+-----*/
-MTM(INVALID , F , F , F , F , F , F , F , F , F ),
-MTM(NORMAL , T , T , T , T , T , T , T , T , T ),
-MTM(TO_SINK , F , F , F , T , T , T , F , F , F ),
-MTM(TO_SOURCE, F , F , T , F , F , T , F , F , F ),
-MTM(PINNED , T , T , T , T , T , T , F , F , F ),
-MTM(OPENCL , T , T , T , T , T , T , T , T , F ),
+MTM(INVALID , F , F , F , F , F , F , F , F , F ),
+MTM(NORMAL , T , T , T , T , T , T , T , T , T ),
+MTM(RESERVED1 , F , F , F , F , F , F , F , F , F ),
+MTM(RESERVED2 , F , F , F , F , F , F , F , F , F ),
+MTM(PINNED , T , T , T , T , T , T , F , F , F ),
+MTM(OPENCL , T , T , T , T , T , T , T , T , F ),
};
///\endcode
#undef MTM
@@ -223,8 +221,8 @@ COI_VALID_BUFFER_TYPES_AND_MAP
+-------+-------+-------+*/
MMM(INVALID , F , F , F ),
MMM(NORMAL , T , T , T ),
-MMM(STREAMING_TO_SINK , F , F , T ),
-MMM(STREAMING_TO_SOURCE , F , T , F ),
+MMM(RESERVED1 , F , F , F ),
+MMM(RESERVED2 , F , F , F ),
MMM(PINNED , T , T , T ),
MMM(OPENCL , T , T , T ),
};
@@ -250,7 +248,19 @@ typedef enum COI_COPY_TYPE
/// The runtime should use a CPU copy to copy the data.
/// CPU copy is a synchronous copy. So the resulting operations are always
/// blocking (even though a out_pCompletion event is specified).
- COI_COPY_USE_CPU
+ COI_COPY_USE_CPU,
+
+ /// Same as above, but forces moving entire buffer to target process in Ex
+ /// extended APIs, even if the full buffer is not written.
+ COI_COPY_UNSPECIFIED_MOVE_ENTIRE,
+
+ /// Same as above, but forces moving entire buffer to target process in Ex
+ /// extended APIs, even if the full buffer is not written.
+ COI_COPY_USE_DMA_MOVE_ENTIRE,
+
+ /// Same as above, but forces moving entire buffer to target process in Ex
+ /// extended APIs, even if the full buffer is not written.
+ COI_COPY_USE_CPU_MOVE_ENTIRE
} COI_COPY_TYPE;
@@ -260,9 +270,7 @@ typedef enum COI_COPY_TYPE
/// access in a COIPROCESS. This is used with COIBufferSetState.
///
/// Buffer state holds only for NORMAL Buffers and OPENCL buffers. Pinned
-/// buffers are always valid everywhere they get created. Streaming buffers
-/// do not follow the state transition rules, as a new version of the
-/// buffer is created every time it is Mapped or you issue a RunFunction.
+/// buffers are always valid everywhere they get created.
///
/// Rules on State Transition of the buffer:
/// -. When a Buffer is created by default it is valid only on the source,
@@ -296,11 +304,11 @@ typedef enum COI_COPY_TYPE
///
/// - COIBufferWrite makes the buffer exclusively valid where the write
/// happens. Write gives preference to Source over Sink. In other words
-/// if a buffer is valid on the Source and multiple Sinks, Write will
-/// happen on the Source and will Invalidate all other Sinks. If the
-/// buffer is valid on multiple Sinks ( and not on the Source) then
-/// Intel® Coprocessor Offload Infrastructure (Intel® COI)
-/// selects process handle with the lowest numerical value to do the
+/// if a buffer is valid on the Source and multiple Sinks, Write will
+/// happen on the Source and will Invalidate all other Sinks. If the
+/// buffer is valid on multiple Sinks ( and not on the Source) then
+/// Intel(R) Coprocessor Offload Infrastructure (Intel(R) COI)
+/// selects process handle with the lowest numerical value to do the
/// exclusive write Again, OPENCL buffers are invalidated only if the
/// buffer is not in use on that SINK/SOURCE.
///
@@ -308,41 +316,41 @@ typedef enum COI_COPY_TYPE
/// when data needs to be moved from a valid location. The selection of
/// valid location happens as stated above.
///
-/// - It is possible to alter only parts of the buffer and change it state
-/// In other words it is possible for different parts of the buffer to have
-/// different states on different devices. A byte is the minimum size at
+/// - It is possible to alter only parts of the buffer and change it state
+/// In other words it is possible for different parts of the buffer to have
+/// different states on different devices. A byte is the minimum size at
/// which state can be maintained internally. Granularity level is completely
/// determined by how the buffer gets fragmented.
///
-/// Note: Buffer is considered 'in use' if is
+/// Note: Buffer is considered 'in use' if is
/// - Being used in RunFunction : In use on a Sink
/// - Mapped: In use on a Source
/// - AddRef'd: In use on Sink
///
//////////////////////////////////////////////////////////////////////////////
-/// The buffer states used with COIBufferSetState call to indicate the new
+/// The buffer states used with COIBufferSetState call to indicate the new
/// state of the buffer on a given process
///
typedef enum {
COI_BUFFER_VALID = 0, // Buffer is valid and up-to-date on the process
COI_BUFFER_INVALID , // Buffer is not valid, need valid data
COI_BUFFER_VALID_MAY_DROP, // Same as valid but will drop the content when
- // evicted to avoid overwriting the shadow
+ // evicted to avoid overwriting the shadow
// memory
COI_BUFFER_RESERVED // Reserved for internal use
} COI_BUFFER_STATE;
///
/// Note: A VALID_MAY_DROP declares a buffer's copy as secondary on a given
/// process. This means that there needs to be at least one primary copy of the
-/// the buffer somewhere in order to mark the buffer as VALID_MAY_DROP on a
+/// the buffer somewhere in order to mark the buffer as VALID_MAY_DROP on a
/// process. In other words to make a buffer VALID_MAY_DROP on a given process
/// it needs to be in COI_BUFFER_VALID state somewhere else. The operation gets
/// ignored (or is a nop) if there is no primary copy of the buffer. The nature
-/// of this state to "drop the content" when evicted is a side effect of
-/// marking the buffer as secondary copy. So when a buffer marked
-/// VALID_MAY_DROP is evicted Intel® Coprocessor Offload Infrastructure
-/// (Intel® COI) doesn't back it up as it is assumed that
+/// of this state to "drop the content" when evicted is a side effect of
+/// marking the buffer as secondary copy. So when a buffer marked
+/// VALID_MAY_DROP is evicted Intel(R) Coprocessor Offload Infrastructure
+/// (Intel(R) COI) doesn't back it up as it is assumed that
/// there is a primary copy somewhere.
//////////////////////////////////////////////////////////////////////////////
@@ -355,19 +363,37 @@ typedef enum {
// A process handle for COIBufferSetState call to indicate all the sink
// processes where the given buffer is valid
-#define COI_SINK_OWNERS ((COIPROCESS)-2)
+#define COI_SINK_OWNERS ((COIPROCESS)-2)
+
+// Matrix descriptors used with MultiD Read/Write
+typedef struct dim_desc {
+ int64_t size; // Size of data type
+ int64_t lindex; // Lower index, used in Fortran
+ int64_t lower; // Lower section bound
+ int64_t upper; // Upper section bound
+ int64_t stride; // Stride, or number of bytes between the start
+ // of one element and start of next one divided
+ // by size.
+} dim_desc;
+
+typedef struct arr_desc {
+ int64_t base; // Base address
+ int64_t rank; // Rank of array, i.e. number of dimensions
+ dim_desc dim[3]; // This array has as many elements as “rank”
+ // currently limited to 3.
+} arr_desc;
//////////////////////////////////////////////////////////////////////////////
///
/// Creates a buffer that can be used in RunFunctions that are queued in
/// pipelines. The address space for the buffer is reserved when it is
/// created although the memory may not be committed until the buffer is
-/// used for the first time. Please note that the Intel® Coprocessor Offload
-/// Infrastructure (Intel® COI) runtime may also
-/// allocate space for the source process to use as shadow memory for
-/// certain types of buffers. If Intel® Coprocessor Offload Infrastructure
-/// (Intel® COI) does allocate this memory it will not
-/// be released or reallocated until the COIBuffer is destroyed.
+/// used for the first time. Please note that the Intel(R) Coprocessor Offload
+/// Infrastructure (Intel(R) COI) runtime may also allocate space for the
+/// source process to use as shadow memory for certain types of buffers.
+/// If Intel(R) Coprocessor Offload Infrastructure (Intel(R) COI)
+/// does allocate this memory it will not be released or reallocated
+/// until the COIBuffer is destroyed.
///
/// @param in_Size
/// [in] The number of bytes to allocate for the buffer. If in_Size
@@ -408,13 +434,13 @@ typedef enum {
/// which flags and types are compatible.
///
/// @return COI_OUT_OF_RANGE if in_Size is zero, if the bits set in
-/// the in_Flags parameter are not recognized flags, or if
-/// in_NumProcesses is zero.
+/// the in_Flags parameter are not recognized flags, or if in_NumProcesses is zero.
///
/// @return COI_INVALID_POINTER if the in_pProcesses or out_pBuffer parameter
/// is NULL.
///
-/// @return COI_NOT_SUPPORTED if one of the in_Flags is COI_SINK_MEMORY.
+/// @return COI_NOT_SUPPORTED if in_Type has invalid value or if
+/// one of the in_Flags is COI_SINK_MEMORY.
///
/// @return COI_NOT_SUPPORTED if the flags include either
/// COI_SAME_ADDRESS_SINKS or COI_SAME_ADDRESS_SINKS_AND_SOURCE and
@@ -425,7 +451,10 @@ typedef enum {
///
/// @return COI_OUT_OF_MEMORY if allocating the buffer fails.
///
-/// @return COI_RESOURCE_EXHAUSTED if the sink is out of buffer memory.
+/// @return COI_RESOURCE_EXHAUSTED if the sink is out of buffer memory. This
+/// error can also be thrown from Windows 7 operating systems if
+/// COI_BUFFER_PINNED and a size larger than 4KB is requested.
+/// This is due to a limitation of the Windows 7 memory management unit.
///
COIACCESSAPI
COIRESULT
@@ -442,22 +471,22 @@ COIBufferCreate(
///
/// Creates a buffer from some existing memory that can be used in
/// RunFunctions that are queued in pipelines. If the flag COI_SINK_MEMORY
-/// is specified then Intel® Coprocessor Offload I
-/// nfrastructure (Intel® COI) will use that memory for the buffer on the sink.
-/// If that flag isn't set then the memory provided is used as backing store
+/// is specified then Intel(R) Coprocessor Offload
+/// Infrastructure (Intel(R) COI) will use that memory for the buffer on the sink.
+/// If that flag isn't set then the memory provided is used as backing store
/// for the buffer on the source. In either case the memory must not be freed
/// before the buffer is destroyed.
-/// While the user still owns the memory passed in they must use the
+/// While the user still owns the memory passed in they must use the
/// appropriate access flags when accessing the buffer in COIPipelinRunFunction
/// or COIBufferMap calls so that the runtime knows when the
/// memory has been modified. If the user just writes directly to the memory
-/// location then those changes may not be visible when the corresponding
+/// location then those changes may not be visible when the corresponding
/// buffer is accessed.
/// Whatever values are already present in the memory location when this call
/// is made are preserved. The memory values are also preserved when
/// COIBufferDestroy is called.
///
-/// @warning: Use of this function is highly discouraged if the calling program
+/// @warning: Use of this function is highly discouraged if the calling
/// program forks at all (including calls to system(3), popen(3), or similar
/// functions) during the life of this buffer. See the discussion around the
/// in_Memory parameter below regarding this.
@@ -467,8 +496,7 @@ COIBufferCreate(
/// is not page aligned, it will be rounded up.
///
/// @param in_Type
-/// [in] The type of the buffer to create. Note that streaming buffers
-/// can not be created from user memory. Only COI_BUFFER_NORMAL and
+/// [in] The type of the buffer to create. Only COI_BUFFER_NORMAL and
/// COI_BUFFER_PINNED buffer types are supported.
///
/// @param in_Flags
@@ -496,7 +524,7 @@ COIBufferCreate(
/// system(3), popen(3), among others).
///
/// For forked processes, Linux uses copy-on-write semantics for
-/// performances reasons. Conseqeuently, if the parent forks and then
+/// performance reasons. Consequently, if the parent forks and then
/// writes to this memory, the physical page mapping changes causing
/// the DMA to fail (and thus data corruption).
///
@@ -522,8 +550,8 @@ COIBufferCreate(
///
/// @return COI_SUCCESS if the buffer was created
///
-/// @return COI_NOT_SUPPORTED if the in_Type value is not COI_BUFFER_NORMAL or
-/// COI_BUFFER_PINNED.
+/// @return COI_NOT_SUPPORTED if the in_Type value is not COI_BUFFER_NORMAL,
+/// COI_BUFFER_PINNED, or COI_BUFFER_OPENCL.
///
/// @return COI_NOT_SUPPORTED if in_Memory is read-only memory
///
@@ -547,8 +575,7 @@ COIBufferCreate(
/// COI_OPTIMIZE_HUGE_PAGE_SIZE are both set.
///
/// @return COI_OUT_OF_RANGE if in_Size is zero, if the bits set in
-/// the in_Flags parameter are not recognized flags, or if
-/// in_NumProcesses is zero.
+/// the in_Flags parameter are not recognized flags, or if in_NumProcesses is zero.
///
/// @return COI_INVALID_POINTER if in_Memory, in_pProcesses or
/// out_pBuffer parameter is NULL.
@@ -560,7 +587,7 @@ COIACCESSAPI
COIRESULT
COIBufferCreateFromMemory(
uint64_t in_Size,
- COI_BUFFER_TYPE in_Type,
+ COI_BUFFER_TYPE in_Type,
uint32_t in_Flags,
void* in_Memory,
uint32_t in_NumProcesses,
@@ -570,10 +597,10 @@ COIBufferCreateFromMemory(
//////////////////////////////////////////////////////////////////////////////
///
-/// Destroys a buffer. Will block on completion of any operations on the
-/// buffer, such as COIPipelineRunFunction or COIBufferCopy. Will block until
+/// Destroys a buffer. Will block on completion of any operations on the
+/// buffer, such as COIPipelineRunFunction or COIBufferCopy. Will block until
/// all COIBufferAddRef calls have had a matching COIBufferReleaseRef call
-/// made. Will not block on an outstanding COIBufferUnmap but will instead
+/// made. will not block on an outstanding COIBufferUnmap but will instead
/// return COI_RETRY.
///
/// @param in_Buffer
@@ -599,32 +626,30 @@ COIBufferDestroy(
///
/// This call initiates a request to access a region of a buffer. Multiple
/// overlapping (or non overlapping) regions can be mapped simultaneously for
-/// any given buffer. If a completion event is specified this call will
+/// any given buffer. If a completion event is specified this call will
/// queue a request for the data which will be satisfied when the buffer is
-/// available. Once all conditions are met the completion event will be
-/// signaled and the user can access the data at out_ppData. The user can call
+/// available. Once all conditions are met the completion event will be
+/// signaled and the user can access the data at out_ppData. The user can call
/// COIEventWait with out_pCompletion to find out when the map operation has
/// completed. If the user accesses the data before the map operation is
-/// complete the results are undefined. If out_pCompletion is NULL then this
+/// complete the results are undefined. If out_pCompletion is NULL then this
/// call blocks until the map operation completes and when this call returns
-/// out_ppData can be safely accessed. This call returns a map instance handle
+/// out_ppData can be safely accessed. This call returns a map instance handle
/// in an out parameter which must be passed into COIBufferUnmap when the user
/// no longer needs access to that region of the buffer.
///
/// The address returned from COIBufferMap may point to memory that
-/// Intel® Coprocessor Offload Infrastructure (Intel® COI)
+/// Intel(R) Coprocessor Offload Infrastructure (Intel(R) COI)
/// manages on behalf of the user. The user must not free or reallocate this
-/// memory, Intel® Coprocessor Offload Infrastructure (Intel® COI)
+/// memory, Intel(R) Coprocessor Offload Infrastructure (Intel(R) COI)
/// will perform any necessary cleanup when the buffer is
/// destroyed.
///
/// Note that different types of buffers behave differently when mapped.
/// For instance, mapping a COI_BUFFER_NORMAL for write must stall if the
-/// buffer is currently being written to by a run function. Mapping a
-/// COI_BUFFER_STREAMING_TO_SINK will create a new physical copy of the buffer
-/// and make it available immediately. Mapping a COI_BUFFER_PINNED buffer will
-/// not affect other functions that use that buffer since a COI_BUFFER_PINNED
-/// buffer can be mapped at any time.
+/// buffer is currently being written to by a run function. Mapping
+/// a COI_BUFFER_PINNED buffer will not affect other functions that use
+/// that buffer since a COI_BUFFER_PINNED buffer can be mapped at any time.
/// The asynchronous operation of COIBufferMap will likely be most useful when
/// paired with a COI_BUFFER_NORMAL.
///
@@ -633,15 +658,15 @@ COIBufferDestroy(
///
/// @param in_Offset
/// [in] Offset into the buffer that a pointer should be returned
-/// for. The value 0 can be passed in to signify that the mapped
+/// for. The value 0 can be passed in to signify that the mapped
/// region should start at the beginning of the buffer.
///
/// @param in_Length
/// [in] Length of the buffer area to map. This parameter, in
/// combination with in_Offset, allows the caller to specify
-/// that only a subset of an entire buffer need be mapped. A
-/// value of 0 can be passed in only if in_Offset is 0, to signify
-/// that the mapped region is the entire buffer.
+/// that only a subset of an entire buffer need be mapped. A
+/// value of 0 can be passed in only if in_Offset is 0, to signify
+/// that the mapped region is the entire buffer.
///
/// @param in_Type
/// [in] The access type that is needed by the application. This will
@@ -700,11 +725,6 @@ COIBufferDestroy(
/// @return COI_ARGUMENT_MISMATCH if the in_Type of map is not a valid type
/// for in_Buffer's type of buffer.
///
-/// @return COI_RESOURCE_EXHAUSTED if could not create a version for TO_SINK
-/// streaming buffer. It can fail if enough memory is not available to
-/// register. This call will succeed eventually when the registered
-/// memory becomes available.
-///
/// @return COI_INVALID_HANDLE if in_Buffer is not a valid buffer handle.
///
/// @return COI_INVALID_POINTER if out_pMapInstance or out_ppData is NULL.
@@ -725,9 +745,9 @@ COIBufferMap(
//////////////////////////////////////////////////////////////////////////////
///
/// Disables Source access to the region of the buffer that was provided
-/// through the corresponding call to COIBufferMap. The number of calls to
+/// through the corresponding call to COIBufferMap. The number of calls to
/// COIBufferUnmap() should always match the number of calls made to
-/// COIBufferMap(). The data pointer returned from the COIBufferMap() call
+/// COIBufferMap(). The data pointer returned from the COIBufferMap() call
/// will be invalid after this call.
///
/// @param in_MapInstance
@@ -750,7 +770,7 @@ COIBufferMap(
///
/// @param out_pCompletion
/// [out] An optional pointer to a COIEVENT object that will be
-/// signaled when the unmap is complete. The user may pass in NULL if
+/// signaled when the unmap is complete. The user may pass in NULL if
/// the user wants COIBufferUnmap to perform a blocking unmap
/// operation.
///
@@ -774,11 +794,12 @@ COIBufferUnmap(
//////////////////////////////////////////////////////////////////////////////
///
-/// Gets the Sink's virtual address of the buffer. This is the same
-/// address that is passed to the run function on the Sink. The virtual
+/// Gets the Sink's virtual address of the buffer for the first process
+/// that is using the buffer. This is the same address
+/// that is passed to the run function on the Sink. The virtual
/// address assigned to the buffer for use on the sink is fixed;
/// the buffer will always be present at that virtual address on the sink
-/// and will not get a different virtual address across different
+/// and will not get a different virtual address across different
/// RunFunctions.
/// This address is only valid on the Sink and should not be dereferenced on
/// the Source (except for the special case of buffers created with the
@@ -796,9 +817,6 @@ COIBufferUnmap(
///
/// @return COI_INVALID_POINTER if the out_pAddress parameter was invalid.
///
-/// @return COI_NOT_SUPPORTED if the buffer passed in is of type
-/// COI_BUFFER_STREAMING_TO_SOURCE or COI_BUFFER_STREAMING_TO_SINK.
-///
COIACCESSAPI
COIRESULT
COIBufferGetSinkAddress(
@@ -807,9 +825,47 @@ COIBufferGetSinkAddress(
//////////////////////////////////////////////////////////////////////////////
///
+/// Gets the Sink's virtual address of the buffer. This is the same
+/// address that is passed to the run function on the Sink. The virtual
+/// address assigned to the buffer for use on the sink is fixed;
+/// the buffer will always be present at that virtual address on the sink
+/// and will not get a different virtual address across different
+/// RunFunctions.
+/// This address is only valid on the Sink and should not be dereferenced on
+/// the Source (except for the special case of buffers created with the
+/// COI_SAME_ADDRESS flag).
+///
+/// @param in_Process
+/// [in] The process for which the address should be returned.
+/// Special handle value 0 can be passed to the function;
+/// in this case, address for the first valid process will be returned
+///
+/// @param in_Buffer
+/// [in] Buffer handle
+///
+/// @param out_pAddress
+/// [out] pointer to a uint64_t* that will be filled with the address.
+///
+/// @return COI_SUCCESS upon successful return of the buffer's address.
+///
+/// @return COI_INVALID_HANDLE if the passed in buffer or process
+/// handle was invalid.
+///
+/// @return COI_INVALID_POINTER if the out_pAddress parameter was invalid.
+///
+/// @return COI_OUT_OF_RANGE if the in_Process is not valid for in_Buffer at the
+/// moment of calling the function.
+///
+COIACCESSAPI
+COIRESULT
+COIBufferGetSinkAddressEx(
+ COIPROCESS in_Process,
+ COIBUFFER in_Buffer,
+ uint64_t* out_pAddress);
+
+//////////////////////////////////////////////////////////////////////////////
+///
/// Copy data from a normal virtual address into an existing COIBUFFER.
-/// Note that it is not possible to use this API with any type of
-/// Intel® Coprocessor Offload Infrastructure (Intel® COI) Streaming Buffers.
/// Please note that COIBufferWrite does not follow implicit buffer
/// dependencies. If a buffer is in use in a run function or has been added
/// to a process using COIBufferAddRef the call to COIBufferWrite will not
@@ -817,19 +873,22 @@ COIBufferGetSinkAddress(
/// This is to facilitate a usage model where a buffer is being used outside
/// of a run function, for example in a spawned thread, but data still needs
/// to be transferred to or from the buffer.
+/// Additionally this means that if more than one DMA channel is enabled,
+/// (See COIProcessConfigureDMA) operations to the same buffer may
+/// happen in parallel if they can be assigned to different DMA hardware.
+/// So it is highly recommended to use explicit event dependencies to
+/// order operations where needed.
///
/// @param in_DestBuffer
/// [in] Buffer to write into.
///
-#ifdef COI_PROTOTYPE_TARGET_PROCESS
/// @param in_DestProcess
-/// [in] A pointer to the processes which are used as hints
-/// to to COI. Buffers are updated upon these processes first.
+/// [in] A pointer to the process to which the data will be written.
+/// Buffer is updated only in this process and invalidated in other
+/// processes. Only a single process can be specified.
/// Can be left NULL and default behavior will be chosen, which
-/// chooses the lowest SCIF node with an active regions first. Others
-/// buffer regions are invalidated in both cases. Will only update a single
-/// process at this time.
-#endif
+/// chooses the first valid process in which regions are found. Other
+/// buffer regions are invalidated if not updated.
///
/// @param in_Offset
/// [in] Location in the buffer to start writing to.
@@ -869,7 +928,7 @@ COIBufferGetSinkAddress(
/// synchronous and will block until the transfer is complete.
///
///
-/// @return COI_SUCCESS if the buffer was copied successfully.
+/// @return COI_SUCCESS if the buffer was written successfully.
///
/// @return COI_INVALID_HANDLE if the buffer handle was invalid.
///
@@ -881,9 +940,6 @@ COIBufferGetSinkAddress(
/// @return COI_ARGUMENT_MISMATCH if in_pDependencies is NULL but
/// in_NumDependencies is not 0.
///
-/// @return COI_NOT_SUPPORTED if the source buffer is of type
-/// COI_BUFFER_STREAMING_TO_SINK or COI_BUFFER_STREAMING_TO_SOURCE.
-///
/// @return COI_INVALID_POINTER if the in_pSourceData pointer is NULL.
///
/// @return COI_OUT_OF_RANGE if in_Offset + in_Length exceeds the size of
@@ -894,10 +950,9 @@ COIBufferGetSinkAddress(
/// @return COI_RETRY if in_DestBuffer is mapped and is not a COI_BUFFER_PINNED
/// buffer or COI_BUFFER_OPENCL buffer.
///
-#ifdef COI_PROTOTYPE_TARGET_PROCESS
COIACCESSAPI
COIRESULT
-COIBufferWrite(
+COIBufferWriteEx(
COIBUFFER in_DestBuffer,
const COIPROCESS in_DestProcess,
uint64_t in_Offset,
@@ -907,8 +962,299 @@ COIBufferWrite(
uint32_t in_NumDependencies,
const COIEVENT* in_pDependencies,
COIEVENT* out_pCompletion);
-__asm__(".symver COIBufferWrite,COIBufferWrite@COI_2.0");
-#else
+
+//////////////////////////////////////////////////////////////////////////////
+///
+/// Copy data specified by multi-dimensional array data structure into another
+/// multi-dimensional array in an existing COIBUFFER.
+/// Arrays with more than 3 dimensions are not supported.
+/// Different numbers of elements between src and destination is not supported.
+/// Please note that COIBufferWriteMultiD does not follow implicit buffer
+/// dependencies. If a buffer is in use in a run function or has been added
+/// to a process using COIBufferAddRef the call to COIBufferWriteMultiD will not
+/// wait, it will still copy data immediately.
+/// This is to facilitate a usage model where a buffer is being used outside
+/// of a run function, for example in a spawned thread, but data still needs
+/// to be transferred to or from the buffer.
+/// Additionally this means that if more than one DMA channel is enabled,
+/// (See COIProcessConfigureDMA) operations to the same buffer may
+/// happen in parallel if they can be assigned to different DMA hardware.
+/// So it is highly recommended to use explicit event dependencies to
+/// order operations where needed.
+///
+///
+/// @param in_DestBuffer
+/// [in] Buffer to write into.
+///
+/// @param in_DestProcess
+/// [in] A pointer to the process to which the data will be written.
+/// Buffer is updated only in this process and invalidated in other
+/// processes. Only a single process can be specified.
+/// Can be left NULL and default behavior will be chosen, which
+/// chooses the first valid process in which regions are found. Other
+/// buffer regions are invalidated if not updated.
+///
+/// @param in_Offset
+/// [in] Start location of the destination array within the buffer.
+///
+/// @param in_DestArray
+/// [in] A pointer to a data structure describing the structure of
+/// the data array in the buffer. Total size must not be larger than
+/// the size of in_DestBuffer. The base field of this structure will
+/// be ignored.
+///
+/// @param in_SrcArray
+/// [in] A pointer to a data structure describing the structure of
+/// the data array in local memory that should be copied. in_SrcArray
+/// and in_DestArry must have the same number of elements. The base
+/// field of this structure should be the virtual pointer to the local
+/// memory in which this array is located.
+///
+/// @param in_Type
+/// [in] The type of copy operation to use, one of either
+/// COI_COPY_UNSPECIFIED, COI_COPY_USE_DMA, COI_COPY_USE_CPU.
+///
+/// @param in_NumDependencies
+/// [in] The number of dependencies specified in the in_pDependencies
+/// array. This may be 0 if the caller does not want the write call to
+/// wait for any additional events to be signaled before starting the
+/// write operation.
+///
+/// @param in_pDependencies
+/// [in] An optional array of handles to previously created COIEVENT
+/// objects that this write operation will wait for before starting.
+/// This allows the user to create dependencies between buffer write
+/// calls and other operations such as run functions and map calls. The
+/// user may pass in NULL if they do not wish to wait for any
+/// additional dependencies to complete before doing the write.
+///
+/// @param out_pCompletion
+/// [out] An optional event to be signaled when the write has
+/// completed. This event can be used as a dependency to order
+/// the write with regard to future operations.
+/// If no completion event is passed in then the write is
+/// synchronous and will block until the transfer is complete.
+///
+///
+/// @return COI_SUCCESS if the buffer was copied successfully.
+///
+/// @return COI_INVALID_HANDLE if the buffer or process handle was invalid.
+///
+/// @return COI_OUT_OF_RANGE if in_Offset is beyond the end of the buffer.
+///
+/// @return COI_ARGUMENT_MISMATCH if the in_pDependencies is non NULL but
+/// in_NumDependencies is 0.
+///
+/// @return COI_ARGUMENT_MISMATCH if in_pDependencies is NULL but
+/// in_NumDependencies is not 0.
+///
+/// @return COI_NOT_SUPPORTED or dimension of destination or source arrays
+/// are greater than 3 or less than 1
+///
+/// @return COI_INVALID_POINTER if the pointer in_SrcArray->base is NULL.
+///
+/// @return COI_OUT_OF_RANGE if in_Offset + size of in_DestArray exceeds the
+/// size of the buffer.
+///
+/// @return COI_OUT_OF_MEMORY if any allocation of memory fails
+///
+/// @return COI_RETRY if in_DestBuffer is mapped and is not a COI_BUFFER_PINNED
+/// buffer or COI_BUFFER_OPENCL buffer.
+///
+COIACCESSAPI
+COIRESULT
+COIBufferWriteMultiD(
+ COIBUFFER in_DestBuffer,
+ const COIPROCESS in_DestProcess,
+ uint64_t in_Offset,
+ struct arr_desc* in_DestArray,
+ struct arr_desc* in_SrcArray,
+ COI_COPY_TYPE in_Type,
+ uint32_t in_NumDependencies,
+ const COIEVENT* in_pDependencies,
+ COIEVENT* out_pCompletion);
+
+//////////////////////////////////////////////////////////////////////////////
+///
+/// Copy data specified by multi-dimensional array data structure from an
+/// existing COIBUFFER to another multi-dimensional array located in memory.
+/// Arrays with more than 3 dimensions are not supported.
+/// Different numbers of elements between source and destination are not supported.
+/// Please note that COIBufferReadMultiD does not follow implicit buffer
+/// dependencies. If a buffer is in use in a run function or has been added
+/// to a process using COIBufferAddRef the call to COIBufferReadMultiD will not
+/// wait, it will still copy data immediately.
+/// This is to facilitate a usage model where a buffer is being used outside
+/// of a run function, for example in a spawned thread, but data still needs
+/// to be transferred to or from the buffer.
+/// Additionally this means that if more than one DMA channel is enabled,
+/// (See COIProcessConfigureDMA) operations to the same buffer may
+/// happen in parallel if they can be assigned to different DMA hardware.
+/// So it is highly recommended to use explicit event dependencies to
+/// order operations where needed.
+///
+///
+/// @param in_SourceBuffer
+/// [in] Buffer to read from.
+///
+/// @param in_Offset
+/// [in] Start location of the source array within the buffer.
+///
+/// @param in_DestArray
+/// [in] A pointer to a data structure describing the structure of
+/// the data array in the buffer. Total size must not be larger than
+/// the size of in_DestBuffer. The base field of this structure will
+/// be ignored.
+///
+/// @param in_SrcArray
+/// [in] A pointer to a data structure describing the structure of
+/// the data array in local memory that should be copied. in_SrcArray
+/// and in_DestArry must have the same number of elements. The base
+/// field of this structure should be the virtual pointer to the local
+/// memory in which this array is located.
+///
+/// @param in_Type
+/// [in] The type of copy operation to use, one of either
+/// COI_COPY_UNSPECIFIED, COI_COPY_USE_DMA, COI_COPY_USE_CPU.
+///
+/// @param in_NumDependencies
+/// [in] The number of dependencies specified in the in_pDependencies
+/// array. This may be 0 if the caller does not want the write call to
+/// wait for any additional events to be signaled before starting the
+/// write operation.
+///
+/// @param in_pDependencies
+/// [in] An optional array of handles to previously created COIEVENT
+/// objects that this write operation will wait for before starting.
+/// This allows the user to create dependencies between buffer write
+/// calls and other operations such as run functions and map calls. The
+/// user may pass in NULL if they do not wish to wait for any
+/// additional dependencies to complete before doing the write.
+///
+/// @param out_pCompletion
+/// [out] An optional event to be signaled when the write has
+/// completed. This event can be used as a dependency to order
+/// the write with regard to future operations.
+/// If no completion event is passed in then the write is
+/// synchronous and will block until the transfer is complete.
+///
+///
+/// @return COI_SUCCESS if the buffer was written successfully.
+///
+/// @return COI_INVALID_HANDLE if the buffer or process handle was invalid.
+///
+/// @return COI_OUT_OF_RANGE if in_Offset is beyond the end of the buffer.
+///
+/// @return COI_ARGUMENT_MISMATCH if the in_pDependencies is non NULL but
+/// in_NumDependencies is 0.
+///
+/// @return COI_ARGUMENT_MISMATCH if in_pDependencies is NULL but
+/// in_NumDependencies is not 0.
+///
+/// @return COI_NOT_SUPPORTED or dimension of destination or source arrays
+/// are greater than 3 or less than 1
+///
+/// @return COI_INVALID_POINTER if the pointer in_DestArray->base is NULL.
+///
+/// @return COI_OUT_OF_RANGE if in_Offset + size of in_SourceArray exceeds the
+/// size of the buffer.
+///
+/// @return COI_OUT_OF_MEMORY if any allocation of memory fails
+///
+/// @return COI_RETRY if in_SourceBuffer is mapped and is not a COI_BUFFER_PINNED
+/// buffer or COI_BUFFER_OPENCL buffer.
+///
+COIACCESSAPI
+COIRESULT
+COIBufferReadMultiD(
+ COIBUFFER in_SourceBuffer,
+ uint64_t in_Offset,
+ struct arr_desc* in_DestArray,
+ struct arr_desc* in_SrcArray,
+ COI_COPY_TYPE in_Type,
+ uint32_t in_NumDependencies,
+ const COIEVENT* in_pDependencies,
+ COIEVENT* out_pCompletion);
+
+//////////////////////////////////////////////////////////////////////////////
+///
+/// Copy data from a normal virtual address into an existing COIBUFFER.
+/// Please note that COIBufferWrite does not follow implicit buffer
+/// dependencies. If a buffer is in use in a run function or has been added
+/// to a process using COIBufferAddRef the call to COIBufferWrite will not
+/// wait, it will still copy data immediately.
+/// This is to facilitate a usage model where a buffer is being used outside
+/// of a run function, for example in a spawned thread, but data still needs
+/// to be transferred to or from the buffer.
+/// Additionally this means that if more than one DMA channel is enabled,
+/// (See COIProcessConfigureDMA) operations to the same buffer may
+/// happen in parallel if they can be assigned to different DMA hardware.
+/// So it is highly recommended to use explicit event dependencies to
+/// order operations where needed.
+///
+/// @param in_DestBuffer
+/// [in] Buffer to write into.
+///
+/// @param in_Offset
+/// [in] Location in the buffer to start writing to.
+///
+/// @param in_pSourceData
+/// [in] A pointer to local memory that should be copied into the
+/// provided buffer.
+///
+/// @param in_Length
+/// [in] The number of bytes to write from in_pSourceData into
+/// in_DestBuffer. Must not be larger than the size of in_DestBuffer
+/// and must not over run in_DestBuffer if an in_Offset is provided.
+///
+/// @param in_Type
+/// [in] The type of copy operation to use, one of either
+/// COI_COPY_UNSPECIFIED, COI_COPY_USE_DMA, COI_COPY_USE_CPU.
+///
+/// @param in_NumDependencies
+/// [in] The number of dependencies specified in the in_pDependencies
+/// array. This may be 0 if the caller does not want the write call to
+/// wait for any additional events to be signaled before starting the
+/// write operation.
+///
+/// @param in_pDependencies
+/// [in] An optional array of handles to previously created COIEVENT
+/// objects that this write operation will wait for before starting.
+/// This allows the user to create dependencies between buffer write
+/// calls and other operations such as run functions and map calls. The
+/// user may pass in NULL if they do not wish to wait for any
+/// additional dependencies to complete before doing the write.
+///
+/// @param out_pCompletion
+/// [out] An optional event to be signaled when the write has
+/// completed. This event can be used as a dependency to order
+/// the write with regard to future operations.
+/// If no completion event is passed in then the write is
+/// synchronous and will block until the transfer is complete.
+///
+///
+/// @return COI_SUCCESS if the buffer was copied successfully.
+///
+/// @return COI_INVALID_HANDLE if the buffer handle was invalid.
+///
+/// @return COI_OUT_OF_RANGE if in_Offset is beyond the end of the buffer.
+///
+/// @return COI_ARGUMENT_MISMATCH if the in_pDependencies is non NULL but
+/// in_NumDependencies is 0.
+///
+/// @return COI_ARGUMENT_MISMATCH if in_pDependencies is NULL but
+/// in_NumDependencies is not 0.
+///
+/// @return COI_INVALID_POINTER if the in_pSourceData pointer is NULL.
+///
+/// @return COI_OUT_OF_RANGE if in_Offset + in_Length exceeds the size of
+/// the buffer.
+///
+/// @return COI_OUT_OF_RANGE if in_Length is 0.
+///
+/// @return COI_RETRY if in_DestBuffer is mapped and is not a COI_BUFFER_PINNED
+/// buffer or COI_BUFFER_OPENCL buffer.
+///
COIACCESSAPI
COIRESULT
COIBufferWrite(
@@ -920,14 +1266,10 @@ COIBufferWrite(
uint32_t in_NumDependencies,
const COIEVENT* in_pDependencies,
COIEVENT* out_pCompletion);
-__asm__(".symver COIBufferWrite,COIBufferWrite@COI_1.0");
-#endif
//////////////////////////////////////////////////////////////////////////////
///
/// Copy data from a buffer into local memory.
-/// Note that it is not possible to use this API with any type of
-/// Intel® Coprocessor Offload Infrastructure (Intel® COI) Streaming Buffers.
/// Please note that COIBufferRead does not follow implicit buffer
/// dependencies. If a buffer is in use in a run function or has been added
/// to a process using COIBufferAddRef the call to COIBufferRead will not
@@ -935,6 +1277,11 @@ __asm__(".symver COIBufferWrite,COIBufferWrite@COI_1.0");
/// This is to facilitate a usage model where a buffer is being used outside
/// of a run function, for example in a spawned thread, but data still needs
/// to be transferred to or from the buffer.
+/// Additionally this means that if more than one DMA channel is enabled,
+/// (See COIProcessConfigureDMA) operations to the same buffer may
+/// happen in parallel if they can be assigned to different DMA hardware.
+/// So it is highly recommended to use explicit event dependencies to
+/// order operations where needed.
///
///
/// @param in_SourceBuffer
@@ -989,9 +1336,6 @@ __asm__(".symver COIBufferWrite,COIBufferWrite@COI_1.0");
/// @return COI_ARGUMENT_MISMATCH if in_pDependencies is NULL but
/// in_NumDependencies is not 0.
///
-/// @return COI_NOT_SUPPORTED if the source buffer is of type
-/// COI_BUFFER_STREAMING_TO_SINK or COI_BUFFER_STREAMING_TO_SOURCE.
-///
/// @return COI_OUT_OF_RANGE if in_Offset + in_Length exceeds the size of
/// the buffer.
///
@@ -1019,8 +1363,6 @@ COIBufferRead(
/// Copy data between two buffers. It also allows copying within the same
/// buffer. For copy within the same buffer, if source and destination regions
/// overlap then this API returns error.
-/// Note that it is not possible to use this API with any type of
-/// Intel® Coprocessor Offload Infrastructure (Intel® COI) Streaming Buffers.
/// Please note that COIBufferCopy does not follow implicit buffer
/// dependencies. If a buffer is in use in a run function or has been added
/// to a process using COIBufferAddRef the call to COIBufferCopy will not
@@ -1028,18 +1370,22 @@ COIBufferRead(
/// This is to facilitate a usage model where a buffer is being used outside
/// of a run function, for example in a spawned thread, but data still needs
/// to be transferred to or from the buffer.
+/// Additionally this means that if more than one DMA channel is enabled,
+/// (See COIProcessConfigureDMA) operations to the same buffer may
+/// happen in parallel if they can be assigned to different DMA hardware.
+/// So it is highly recommended to use explicit event dependencies to
+/// order operations where needed.
///
/// @param in_DestBuffer
/// [in] Buffer to copy into.
-#ifdef COI_PROTOTYPE_TARGET_PROCESS
+///
/// @param in_DestProcess
-/// [in] A pointer to the processes which are used as hints
-/// to to COI. Buffers are updated upon these processes first.
+/// [in] A pointer to the process to which the data will be written.
+/// Buffer is updated only in this process and invalidated in other
+/// processes. Only a single process can be specified.
/// Can be left NULL and default behavior will be chosen, which
-/// chooses the lowest SCIF node with an active regions first. Others
-/// buffer regions are invalidated in both cases. Will only update a single
-/// process at this time.
-#endif
+/// chooses the first valid process in which regions are found. Other
+/// buffer regions are invalidated if not updated.
///
/// @param in_SourceBuffer
/// [in] Buffer to copy from.
@@ -1089,7 +1435,7 @@ COIBufferRead(
/// @return COI_INVALID_HANDLE if either buffer handle was invalid.
///
/// @return COI_MEMORY_OVERLAP if in_SourceBuffer and in_DestBuffer are the
-/// same buffer(or have the same parent buffer) and the source and
+/// same buffer(or have the same parent buffer) and the source and
/// destination regions overlap
///
/// @return COI_OUT_OF_RANGE if in_DestOffset is is beyond the end of
@@ -1110,19 +1456,12 @@ COIBufferRead(
/// @return COI_ARGUMENT_MISMATCH if in_pDependencies is NULL but
/// in_NumDependencies is not 0.
///
-/// @return COI_NOT_SUPPORTED if the source or destination buffers are of type
-/// COI_BUFFER_STREAMING_TO_SINK or COI_BUFFER_STREAMING_TO_SOURCE.
-///
-/// @return COI_NOT_SUPPORTED if either buffer is of type
-/// COI_BUFFER_STREAMING_TO_SINK or COI_BUFFER_STREAMING_TO_SOURCE.
-///
/// @return COI_RETRY if in_DestBuffer or in_SourceBuffer are mapped and not
/// COI_BUFFER_PINNED buffers or COI_BUFFER_OPENCL buffers.
///
-#ifdef COI_PROTOTYPE_TARGET_PROCESS
COIACCESSAPI
COIRESULT
-COIBufferCopy(
+COIBufferCopyEx(
COIBUFFER in_DestBuffer,
const COIPROCESS in_DestProcess,
COIBUFFER in_SourceBuffer,
@@ -1133,8 +1472,100 @@ COIBufferCopy(
uint32_t in_NumDependencies,
const COIEVENT* in_pDependencies,
COIEVENT* out_pCompletion);
-__asm__(".symver COIBufferCopy,COIBufferCopy@COI_2.0");
-#else
+
+//////////////////////////////////////////////////////////////////////////////
+///
+/// Copy data between two buffers. It also allows copying within the same
+/// buffer. For copy within the same buffer, if source and destination regions
+/// overlap then this API returns error.
+/// Please note that COIBufferCopy does not follow implicit buffer
+/// dependencies. If a buffer is in use in a run function or has been added
+/// to a process using COIBufferAddRef the call to COIBufferCopy will not
+/// wait, it will still copy data immediately.
+/// This is to facilitate a usage model where a buffer is being used outside
+/// of a run function, for example in a spawned thread, but data still needs
+/// to be transferred to or from the buffer.
+/// Additionally this means that if more than one DMA channel is enabled,
+/// (See COIProcessConfigureDMA) operations to the same buffer may
+/// happen in parallel if they can be assigned to different DMA hardware.
+/// So it is highly recommended to use explicit event dependencies to
+/// order operations where needed.
+///
+/// @param in_DestBuffer
+/// [in] Buffer to copy into.
+///
+/// @param in_SourceBuffer
+/// [in] Buffer to copy from.
+///
+/// @param in_DestOffset
+/// [in] Location in the destination buffer to start writing to.
+///
+/// @param in_SourceOffset
+/// [in] Location in the source buffer to start reading from.
+///
+/// @param in_Length
+/// [in] The number of bytes to copy from in_SourceBuffer into
+/// in_DestinationBuffer.
+/// If the length is specified as zero then length to be copied
+/// is entire destination buffer's length.
+/// Must not be larger than the size of in_SourceBuffer or
+/// in_DestBuffer and must not over run in_SourceBuffer or
+/// in_DestBuffer if offsets are specified.
+///
+/// @param in_Type
+/// [in] The type of copy operation to use, one of either
+/// COI_COPY_UNSPECIFIED, COI_COPY_USE_DMA, COI_COPY_USE_CPU.
+///
+/// @param in_NumDependencies
+/// [in] The number of dependencies specified in the in_pDependencies
+/// array. This may be 0 if the caller does not want the copy call to
+/// wait for any additional events to be signaled before starting the
+/// copy operation.
+///
+/// @param in_pDependencies
+/// [in] An optional array of handles to previously created COIEVENT
+/// objects that this copy operation will wait for before starting.
+/// This allows the user to create dependencies between buffer copy
+/// calls and other operations such as run functions and map calls. The
+/// user may pass in NULL if they do not wish to wait for any
+/// additional dependencies to complete before doing the copy.
+///
+/// @param out_pCompletion
+/// [out] An optional event to be signaled when the copy has
+/// completed. This event can be used as a dependency to order
+/// the copy with regard to future operations.
+/// If no completion event is passed in then the copy is
+/// synchronous and will block until the transfer is complete.
+///
+/// @return COI_SUCCESS if the buffer was copied successfully.
+///
+/// @return COI_INVALID_HANDLE if either buffer handle was invalid.
+///
+/// @return COI_MEMORY_OVERLAP if in_SourceBuffer and in_DestBuffer are the
+/// same buffer(or have the same parent buffer) and the source and
+/// destination regions overlap
+///
+/// @return COI_OUT_OF_RANGE if in_DestOffset is is beyond the end of
+/// in_DestBuffer
+///
+/// @return COI_OUT_OF_RANGE if in_SourceOffset is beyond the end of
+/// in_SourceBuffer.
+///
+/// @return COI_OUT_OF_RANGE if in_DestOffset + in_Length exceeds the size of
+/// the in_DestBuffer
+///
+/// @return COI_OUT_OF_RANGE if in_SourceOffset + in_Length exceeds
+/// the size of in_SourceBuffer.
+///
+/// @return COI_ARGUMENT_MISMATCH if the in_pDependencies is non NULL but
+/// in_NumDependencies is 0.
+///
+/// @return COI_ARGUMENT_MISMATCH if in_pDependencies is NULL but
+/// in_NumDependencies is not 0.
+///
+/// @return COI_RETRY if in_DestBuffer or in_SourceBuffer are mapped and not
+/// COI_BUFFER_PINNED buffers or COI_BUFFER_OPENCL buffers.
+///
COIACCESSAPI
COIRESULT
COIBufferCopy(
@@ -1147,21 +1578,20 @@ COIBufferCopy(
uint32_t in_NumDependencies,
const COIEVENT* in_pDependencies,
COIEVENT* out_pCompletion);
-__asm__(".symver COIBufferCopy,COIBufferCopy@COI_1.0");
-#endif
+
//////////////////////////////////////////////////////////////////////////////
///
-/// This API allows an experienced Intel® Coprocessor Offload Infrastructure
-/// (Intel® COI) developer to set where a COIBUFFER is
+/// This API allows an experienced Intel(R) Coprocessor Offload Infrastructure
+/// (Intel(R) COI) developer to set where a COIBUFFER is
/// located and when the COIBUFFER's data is moved. This functionality is
/// useful when the developer knows when and where a buffer is going to be
/// accessed. It allows the data movement to happen sooner than if the
-/// Intel® Coprocessor Offload Infrastructure (Intel® COI)
+/// Intel(R) Coprocessor Offload Infrastructure (Intel(R) COI)
/// runtime tried to manage the buffer placement itself. The advantage of
/// this API is that the developer knows much more about their own
/// application's data access patterns and can therefore optimize the data
-/// access to be much more efficient than the Intel® Coprocessor Offload
-/// Infrastructure (Intel® COI) runtime. Using this API may yield better
+/// access to be much more efficient than the Intel(R)Coprocessor Offload
+/// Infrastructure (Intel(R) COI) runtime. Using this API may yield better
/// memory utilization, lower latency and overall improved workload
/// throughput.
/// This API does respect implicit dependencies for buffer read/write hazards.
@@ -1169,17 +1599,17 @@ __asm__(".symver COIBufferCopy,COIBufferCopy@COI_1.0");
/// requests the buffer be placed in another COIPROCESS then this API will wait
/// for the first access to complete before moving the buffer.
/// This API is not required for program correctness. It is intended solely
-/// for advanced Intel® Coprocessor Offload Infrastructure (Intel® COI)
+/// for advanced Intel(R) Coprocessor Offload Infrastructure (Intel(R) COI)
/// developers who wish to fine tune their application performance
/// Cases where "a change in state" is an error condition the change just gets
-/// ignored without any error. This is because the SetState can be a
+/// ignored without any error. This is because the SetState can be a
/// nonblocking call and in such cases we can't rely on the state of the buffer
/// at the time of the call. We can do the transition checks only at the time
/// when the actual state change happens (which is something in future).
/// Currently there is no way to report an error from something that happens in
/// future and that is why such state transitions are nop. One example is using
-/// VALID_MAY_DROP with COI_SINK_OWNERS when buffer is not valid at source.
-/// This operation will be a nop if at the time of actual state change the
+/// VALID_MAY_DROP with COI_SINK_OWNERS when buffer is not valid at source.
+/// This operation will be a nop if at the time of actual state change the
/// buffer is not valid at source.
///
/// @param in_Buffer
@@ -1188,7 +1618,7 @@ __asm__(".symver COIBufferCopy,COIBufferCopy@COI_1.0");
/// @param in_Process
/// [in] The process where the state is being modified for this
/// buffer. To modify buffer's state on source process use
-/// COI_PROCESS_SOURCE as process handle. To modify buffer's
+/// COI_PROCESS_SOURCE as process handle. To modify buffer's
/// state on all processes where buffer is valid use COI_SINK_OWNERS
/// as the process handle.
///
@@ -1222,7 +1652,7 @@ __asm__(".symver COIBufferCopy,COIBufferCopy@COI_1.0");
/// [out] An optional event to be signaled when the SetState has
/// completed. This event can be used as a dependency to order
/// the SetState with regard to future operations.
-/// If no completion event is passed in then the is
+/// If no completion event is passed in then the state changing is
/// synchronous and will block until the SetState and dma transfers
/// related to this operation are complete.
///
@@ -1239,10 +1669,9 @@ __asm__(".symver COIBufferCopy,COIBufferCopy@COI_1.0");
/// @return COI_ARGUMENT_MISMATCH if the in_Process is COI_SINK_OWNERS and the
/// COI_BUFFER_MOVE is passed as move flag.
///
-/// @return COI_MISSING_DEPENDENCY if buffer was not created on the process
+/// @return COI_MISSING_DEPENDENCY if buffer was not created on the process
/// handle that was passed in.
///
-
COIACCESSAPI
COIRESULT
COIBufferSetState(
@@ -1257,9 +1686,9 @@ COIBufferSetState(
//////////////////////////////////////////////////////////////////////////////
///
/// Creates a sub-buffer that is a reference to a portion of an existing
-/// buffer. The returned buffer handle can be used in all API calls that the
+/// buffer. The returned buffer handle can be used in all API calls that the
/// original buffer handle could be used in except COIBufferCreateSubBuffer.
-/// Sub buffers out of Huge Page Buffer are also supported but the original
+/// Sub buffers out of Huge Page Buffer are also supported but the original
/// buffer needs to be a OPENCL buffer created with COI_OPTIMIZE_HUGE_PAGE_SIZE
/// flag.
///
@@ -1279,7 +1708,7 @@ COIBufferSetState(
/// @param out_pSubBuffer
/// [out] Pointer to a buffer handle that is filled in with the newly
/// created sub-buffer.
-///
+///
/// @return COI_SUCCESS if the sub-buffer was created
///
/// @return COI_INVALID_HANDLE if in_Buffer is not a valid buffer handle.
@@ -1302,6 +1731,79 @@ COIBufferCreateSubBuffer(
uint64_t in_Offset,
COIBUFFER* out_pSubBuffer);
+//////////////////////////////////////////////////////////////////////////////
+///
+/// Releases the reference count on the specified buffer and process by
+/// in_ReleaseRefcnt. The returned result being COI_SUCCESS indicates that the
+/// specified process contains a reference to the specified buffer that has a
+/// refcnt that can be decremented. Otherwise, if the buffer or process
+/// specified do not exist, then COI_INVALID_HANDLE will be returned. If the
+/// process does not contain a reference to the specified buffer then
+/// COI_OUT_OF_RANGE will be returned.
+///
+///
+/// @param in_Process
+/// [in] The COI Process whose reference count for the specified buffer
+/// the user wants to decrement.
+///
+/// @param in_Buffer
+/// [in] The buffer used in the specified coi process in which the user
+/// wants to decrement the reference count.
+///
+/// @param in_ReleaseRefcnt
+/// [in] The value the reference count will be decremented by.
+///
+/// @return COI_SUCCESS if the reference count was successfully decremented.
+///
+/// @return COI_INVALID_HANDLE if in_Buffer or in_Process are invalid handles.
+///
+/// @return COI_OUT_OF_RANGE if the reference for the specified buffer or
+/// process does not exist.
+///
+
+COIACCESSAPI
+COIRESULT
+COIBufferReleaseRefcnt(
+ COIPROCESS in_Process,
+ COIBUFFER in_Buffer,
+ uint64_t in_ReleaseRefcnt);
+
+//////////////////////////////////////////////////////////////////////////////
+///
+/// Increments the reference count on the specified buffer and process by
+/// in_AddRefcnt. The returned result being COI_SUCCESS indicates that the
+/// specified process contains a reference to the specified buffer or a new
+/// reference has been created and that reference has a new refcnt. Otherwise,
+/// if the buffer or process specified do not exist, then COI_INVALID_HANDLE
+/// will be returned. If the input buffer is not valid on the target process
+/// then COI_NOT_INITIALIZED will be returned since the buffer is not current
+/// or allocated on the process.
+///
+/// @param in_Process
+/// [in] The COI Process whose reference count for the specified buffer
+/// the user wants to increment.
+///
+/// @param in_Buffer
+/// [in] The buffer used in the specified coi process in which the user
+/// wants to increment the reference count.
+///
+/// @param in_AddRefcnt
+/// [in] The value the reference count will be incremented by.
+///
+/// @return COI_SUCCESS if the reference count was successfully incremented.
+///
+/// @return COI_INVALID_HANDLE if in_Buffer or in_Process are invalid handles.
+///
+/// @return COI_NOT_INITIALIZED if in_Buffer does not have a buffer state of
+/// COI_BUFFER_VALID on the in_Process.
+///
+COIACCESSAPI
+COIRESULT
+COIBufferAddRefcnt(
+ COIPROCESS in_Process,
+ COIBUFFER in_Buffer,
+ uint64_t in_AddRefcnt);
+
#ifdef __cplusplus
} /* extern "C" */
#endif
diff --git a/liboffloadmic/include/coi/source/COIEngine_source.h b/liboffloadmic/include/coi/source/COIEngine_source.h
index 19a63e434094..a3b7799cfb56 100644
--- a/liboffloadmic/include/coi/source/COIEngine_source.h
+++ b/liboffloadmic/include/coi/source/COIEngine_source.h
@@ -1,5 +1,5 @@
/*
- * Copyright 2010-2013 Intel Corporation.
+ * Copyright 2010-2015 Intel Corporation.
*
* This library is free software; you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as published
@@ -75,7 +75,7 @@ typedef enum
///////////////////////////////////////////////////////////////////////////////
-/// This structure returns information about an Intel(r) Xeon Phi(tm)
+/// This structure returns information about an Intel(R) Xeon Phi(TM)
/// coprocessor.
/// A pointer to this structure is passed into the COIGetEngineInfo() function,
/// which fills in the data before returning to the caller.
@@ -101,6 +101,7 @@ typedef struct COI_ENGINE_INFO
uint32_t CoreMaxFrequency;
/// The load percentage for each of the hardware threads on the engine.
+ /// Currently this is limited to reporting out a maximum of 1024 HW threads
uint32_t Load[COI_MAX_HW_THREADS];
/// The amount of physical memory managed by the OS.
@@ -133,9 +134,9 @@ typedef struct COI_ENGINE_INFO
///////////////////////////////////////////////////////////////////////////////
///
-/// Returns information related to a specified engine. Note that if Intel® Coprocessor Offload Infrastructure (Intel® COI) is
-/// unable to query a value it will be returned as zero but the call will
-/// still succeed.
+/// Returns information related to a specified engine. Note that if Intel(R)
+/// Coprocessor Offload Infrastructure (Intel(R) COI) is unable to query
+/// a value it will be returned as zero but the call will still succeed.
///
///
/// @param in_EngineHandle
@@ -173,14 +174,15 @@ COIEngineGetInfo(
///
/// Returns the number of engines in the system that match the provided ISA.
///
-/// Note that while it is possible to enumerate different types of Intel(r)
-/// Xeon Phi(tm) coprocessors on a single host this is not currently
-/// supported. Intel® Coprocessor Offload Infrastructure (Intel® COI) makes an assumption that all Intel(r) Xeon Phi(tm)
-/// coprocessors found in the system are the same architecture as the first
-/// coprocessor device.
+/// Note that while it is possible to enumerate different types of Intel(R)
+/// Xeon Phi(TM) coprocessors on a single host this is not currently
+/// supported. Intel(R) Coprocessor Offload Infrastructure (Intel(R) COI)
+/// makes an assumption that all Intel(R) Xeon Phi(TM) coprocessors found
+/// in the system are the same architecture as the first coprocessor device.
///
-/// Also, note that this function returns the number of engines that Intel® Coprocessor Offload Infrastructure (Intel® COI)
-/// is able to detect. Not all of them may be online.
+/// Also, note that this function returns the number of engines that Intel(R)
+/// Coprocessor Offload Infrastructure (Intel(R) COI) is able to detect. Not
+/// all of them may be online.
///
/// @param in_ISA
/// [in] Specifies the ISA type of the engine requested.
@@ -211,7 +213,7 @@ COIEngineGetCount(
///
/// @param in_EngineIndex
/// [in] A unsigned integer which specifies the zero-based position of
-/// the engine in a collection of engines. The makeup of this
+/// the engine in a collection of engines. The makeup of this
/// collection is defined by the in_ISA parameter.
///
/// @param out_pEngineHandle
@@ -226,7 +228,8 @@ COIEngineGetCount(
///
/// @return COI_INVALID_POINTER if the out_pEngineHandle parameter is NULL.
///
-/// @return COI_VERSION_MISMATCH if the version of Intel® Coprocessor Offload Infrastructure (Intel® COI) on the host is not
+/// @return COI_VERSION_MISMATCH if the version of Intel(R) Coprocessor Offload
+/// Infrastructure (Intel(R) COI) on the host is not
/// compatible with the version on the device.
///
/// @return COI_NOT_INITIALIZED if the engine requested exists but is offline.
diff --git a/liboffloadmic/include/coi/source/COIEvent_source.h b/liboffloadmic/include/coi/source/COIEvent_source.h
index 99fa00d6a27d..ecb00fafe2d8 100644
--- a/liboffloadmic/include/coi/source/COIEvent_source.h
+++ b/liboffloadmic/include/coi/source/COIEvent_source.h
@@ -1,5 +1,5 @@
/*
- * Copyright 2010-2013 Intel Corporation.
+ * Copyright 2010-2015 Intel Corporation.
*
* This library is free software; you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as published
@@ -59,12 +59,10 @@ extern "C" {
///
/// Special case event values which can be passed in to APIs to specify
/// how the API should behave. In COIBuffer APIs passing in NULL for the
-/// completion event is the equivalent of passing COI_EVENT_SYNC. For
-/// COIPipelineRunFunction passing in NULL is the equivalent of
-/// COI_EVENT_ASYNC.
+/// completion event is the equivalent of passing COI_EVENT_SYNC.
/// Note that passing COI_EVENT_ASYNC can be used when the caller wishes the
/// operation to be performed asynchronously but does not care when the
-/// operation completes. This can be useful for opertions that by definition
+/// operation completes. This can be useful for operations that by definition
/// must complete in order (DMAs, run functions on a single pipeline). If
/// the caller does care when the operation completes then they should pass
/// in a valid completion event which they can later wait on.
@@ -72,6 +70,16 @@ extern "C" {
#define COI_EVENT_ASYNC ((COIEVENT*)1)
#define COI_EVENT_SYNC ((COIEVENT*)2)
+//////////////////////////////////////////////////////////////////////////////
+///
+/// This can be used to initialize a COIEVENT to a known invalid state.
+/// This is not required to use, but can be useful in some cases
+/// if a program is unsure if the event will be initialized by the runtime.
+/// Simply set the event to this value: COIEVENT event = COI_EVENT_INITIALIZER;
+///
+#define COI_EVENT_INITIALIZER { { 0, -1 } }
+
+
///////////////////////////////////////////////////////////////////////////////
///
/// Wait for an arbitrary number of COIEVENTs to be signaled as completed,
@@ -94,17 +102,17 @@ extern "C" {
/// and returns immediately, -1 blocks indefinitely.
///
/// @param in_WaitForAll
-/// [in] Boolean value specifying behavior. If true, wait for all
+/// [in] Boolean value specifying behavior. If true, wait for all
/// events to be signaled, or for timeout, whichever happens first.
/// If false, return when any event is signaled, or at timeout.
///
/// @param out_pNumSignaled
-/// [out] The number of events that were signaled. If in_NumEvents
+/// [out] The number of events that were signaled. If in_NumEvents
/// is 1 or in_WaitForAll = True, this parameter is optional.
///
/// @param out_pSignaledIndices
-/// [out] Pointer to an array of indicies into the original event
-/// array. Those denoted have been signaled. The user must provide an
+/// [out] Pointer to an array of indices into the original event
+/// array. Those denoted have been signaled. The user must provide an
/// array that is no smaller than the in_Events array. If in_NumEvents
/// is 1 or in_WaitForAll = True, this parameter is optional.
///
@@ -132,6 +140,10 @@ extern "C" {
/// @return COI_PROCESS_DIED if the remote process died. See COIProcessDestroy
/// for more details.
///
+/// @return COI_<REAL ERROR> if only a single event is passed in, and that event
+/// failed, COI will attempt to return the real error code that caused
+/// the original operation to fail, otherwise COI_PROCESS_DIED is reported.
+///
COIACCESSAPI
COIRESULT
COIEventWait(
@@ -183,6 +195,103 @@ COIRESULT
COIEventUnregisterUserEvent(
COIEVENT in_Event);
+
+//////////////////////////////////////////////////////////////////////////////
+///
+/// A callback that will be invoked to notify the user of an internal
+/// runtime event completion.
+///
+/// As with any callback mechanism it is up to the user to make sure that
+/// there are no possible deadlocks due to reentrancy (ie the callback being
+/// invoked in the same context that triggered the notification) and also
+/// that the callback does not slow down overall processing. If the user
+/// performs too much work within the callback it could delay further
+/// processing. The callback will be invoked prior to the signaling of
+/// the corresponding COIEvent. For example, if a user is waiting
+/// for a COIEvent associated with a run function completing they will
+/// receive the callback before the COIEvent is marked as signaled.
+///
+/// @param in_Event
+/// [in] The completion event that is associated with the
+/// operation that is being notified.
+///
+/// @param in_Result
+/// [in] The COIRESULT of the operation.
+///
+/// @param in_UserData
+/// [in] Opaque data that was provided when the callback was
+/// registered. Intel(R) Coprocessor Offload Infrastructure
+/// (Intel(R) COI) simply passes this back to the user so that
+/// they can interpret it as they choose.
+///
+typedef void (*COI_EVENT_CALLBACK)(
+ COIEVENT in_Event,
+ const COIRESULT in_Result,
+ const void* in_UserData);
+
+
+
+//////////////////////////////////////////////////////////////////////////////
+///
+/// Registers any COIEVENT to receive a one time callback, when the event
+/// is marked complete in the offload runtime. If the event has completed
+/// before the COIEventRegisterCallback() is called then the callback will
+/// immediately be invoked by the calling thread. When the event is
+/// registered before the event completes, the runtime gaurantees that
+/// the callback will be invoked before COIEventWait() is notified of
+/// the same event completing. In well written user code, this may provide
+/// a slight performance advantage.
+///
+/// Users should treat the callback much like an interrupt routine, in regards
+/// of performance. Specifically designing the callback to be as short and
+/// non blocking as possible. Since the thread that runs the callback is
+/// non deterministic blocking or stalling of the callback, may have severe
+/// performance impacts on the offload runtime. Thus, it is important to not
+/// create deadlocks between the callback and other signaling/waiting
+/// mechanisms. It is recommended to never invoke COIEventWait() inside
+/// a callback function, as this could lead to immediate deadlocks.
+///
+/// It is important to note that the runtime cannot distinguish between
+/// already triggered events and invalid events. Thus the user needs to pass
+/// in a valid event, or the callback will be invoked immediately.
+/// Failed events will still receive a callback and the user can query
+/// COIEventWait() after the callback for the failed return code.
+///
+/// If more than one callback is registered for the same event, only the
+/// single most current callback will be used, i.e. the older one will
+/// be replaced.
+///
+/// @param in_Event
+/// [in] A valid single event handle to be registered to receive a callback.
+///
+/// @param in_Callback
+/// [in] Pointer to a user function used to signal an
+/// event completion.
+///
+/// @param in_UserData
+/// [in] Opaque data to pass to the callback when it is invoked.
+///
+/// @param in_Flags
+/// [in] Reserved parameter for future expansion, required to be zero for now.
+///
+/// @return COI_INVALID_HANDLE if in_Event is not a valid COIEVENT
+///
+/// @return COI_INVALID_HANDLE if in_Callback is not a valid pointer.
+///
+/// @return COI_ARGUMENT_MISMATCH if the in_Flags is not zero.
+///
+/// @return COI_SUCCESS an event is successfully registered
+///
+COIACCESSAPI
+COIRESULT
+COIEventRegisterCallback(
+ const COIEVENT in_Event,
+ COI_EVENT_CALLBACK in_Callback,
+ const void* in_UserData,
+ const uint64_t in_Flags);
+
+
+
#ifdef __cplusplus
} /* extern "C" */
#endif
diff --git a/liboffloadmic/include/coi/source/COIPipeline_source.h b/liboffloadmic/include/coi/source/COIPipeline_source.h
index d210a1f5025f..78c49ec2e099 100644
--- a/liboffloadmic/include/coi/source/COIPipeline_source.h
+++ b/liboffloadmic/include/coi/source/COIPipeline_source.h
@@ -1,5 +1,5 @@
/*
- * Copyright 2010-2013 Intel Corporation.
+ * Copyright 2010-2015 Intel Corporation.
*
* This library is free software; you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as published
@@ -59,12 +59,13 @@ extern "C" {
//////////////////////////////////////////////////////////////////////////////
-/// These flags specify how a buffer will be used within a run function. They
-/// allow Intel® Coprocessor Offload Infrastructure (Intel® COI) to make optimizations in how it moves data around the system.
+/// These flags specify how a buffer will be used within a run function. They
+/// allow the runtime to make optimizations in how it moves the data around.
/// These flags can affect the correctness of an application, so they must be
-/// set properly. For example, if a buffer is used in a run function with the
-/// COI_SINK_READ flag and then mapped on the source, Intel® Coprocessor Offload Infrastructure (Intel® COI) may use a previously
-/// cached version of the buffer instead of retrieving data from the sink.
+/// set properly. For example, if a buffer is used in a run function with the
+/// COI_SINK_READ flag and then mapped on the source, the runtime may use a
+/// previously cached version of the buffer instead of retrieving data from
+/// the sink.
typedef enum COI_ACCESS_FLAGS
{
/// Specifies that the run function will only read the associated buffer.
@@ -76,7 +77,23 @@ typedef enum COI_ACCESS_FLAGS
/// Specifies that the run function will overwrite the entire associated
/// buffer and therefore the buffer will not be synchronized with the
/// source before execution.
- COI_SINK_WRITE_ENTIRE
+ COI_SINK_WRITE_ENTIRE,
+
+ /// Specifies that the run function will only read the associated buffer
+ /// and will maintain the reference count on the buffer after
+ /// run function exit.
+ COI_SINK_READ_ADDREF,
+
+ /// Specifies that the run function will write to the associated buffer
+ /// and will maintain the reference count on the buffer after
+ /// run function exit.
+ COI_SINK_WRITE_ADDREF,
+
+ /// Specifies that the run function will overwrite the entire associated
+ /// buffer and therefore the buffer will not be synchronized with the
+ /// source before execution and will maintain the reference count on the
+ /// buffer after run function exit.
+ COI_SINK_WRITE_ENTIRE_ADDREF
} COI_ACCESS_FLAGS;
#define COI_PIPELINE_MAX_PIPELINES 512
@@ -86,7 +103,7 @@ typedef enum COI_ACCESS_FLAGS
///////////////////////////////////////////////////////////////////////////////
///
-/// Create a pipeline assoiated with a remote process. This pipeline can
+/// Create a pipeline associated with a remote process. This pipeline can
/// then be used to execute remote functions and to share data using
/// COIBuffers.
///
@@ -133,8 +150,8 @@ typedef enum COI_ACCESS_FLAGS
/// @return COI_TIME_OUT_REACHED if establishing the communication channel with
/// the remote pipeline timed out.
///
-/// @return COI_RETRY if the pipeline cannot be created due to the number of
-/// source-to-sink connections in use. A subsequent call to
+/// @return COI_RETRY if the pipeline cannot be created due to the number of
+/// source-to-sink connections in use. A subsequent call to
/// COIPipelineCreate may succeed if resources are freed up.
///
/// @return COI_PROCESS_DIED if in_Process died.
@@ -149,7 +166,7 @@ COIPipelineCreate(
///////////////////////////////////////////////////////////////////////////////
///
-/// Destroys the inidicated pipeline, releasing its resources.
+/// Destroys the indicated pipeline, releasing its resources.
///
/// @param in_Pipeline
/// [in] Pipeline to destroy.
@@ -175,22 +192,21 @@ COIPipelineDestroy(
///
/// 1. Proper care has to be taken while setting the input dependencies for
/// RunFunctions. Setting it incorrectly can lead to cyclic dependencies
-/// and can cause the respective pipeline (as a result Intel® Coprocessor Offload Infrastructure (Intel® COI) Runtime) to
-/// stall.
+/// and can cause the respective pipeline to stall.
/// 2. RunFunctions can also segfault if enough memory space is not available
/// on the sink for the buffers passed in. Pinned buffers and buffers that
/// are AddRef'd need to be accounted for available memory space. In other
/// words, this memory is not available for use until it is freed up.
-/// 3. Unexpected segmentation faults or erroneous behaviour can occur if
-/// handles or data passed in to Runfunction gets destroyed before the
+/// 3. Unexpected segmentation faults or erroneous behavior can occur if
+/// handles or data passed in to Runfunction gets destroyed before the
/// RunFunction finishes.
/// For example, if a variable passed in as Misc data or the buffer gets
-/// destroyed before the Intel® Coprocessor Offload Infrastructure (Intel® COI) runtime receives the completion notification
-/// of the Runfunction, it can cause unexpected behaviour. So it is always
+/// destroyed before the runtime receives the completion notification
+/// of the Runfunction, it can cause unexpected behavior. So it is always
/// recommended to wait for RunFunction completion event before any related
/// destroy event occurs.
///
-/// Intel® Coprocessor Offload Infrastructure (Intel® COI) Runtime expects users to handle such scenarios. COIPipelineRunFunction
+/// The runtime expects users to handle such scenarios. COIPipelineRunFunction
/// returns COI_SUCCESS for above cases because it was queued up successfully.
/// Also if you try to destroy a pipeline with a stalled function then the
/// destroy call will hang. COIPipelineDestroy waits until all the functions
@@ -240,7 +256,7 @@ COIPipelineDestroy(
/// [in] Pointer to user defined data, typically used to pass
/// parameters to Sink side functions. Should only be used for small
/// amounts data since the data will be placed directly in the
-/// Driver's command buffer. COIBuffers should be used to pass large
+/// Driver's command buffer. COIBuffers should be used to pass large
/// amounts of data.
///
/// @param in_MiscDataLen
@@ -250,8 +266,8 @@ COIPipelineDestroy(
///
/// @param out_pAsyncReturnValue
/// [out] Pointer to user-allocated memory where the return value from
-/// the run function will be placed. This memory should not be read
-/// until out_pCompletion has been signalled.
+/// the run function will be placed. This memory should not be read
+/// until out_pCompletion has been signaled.
///
/// @param in_AsyncReturnValueLen
/// [in] Size of the out_pAsyncReturnValue in bytes.
@@ -259,11 +275,14 @@ COIPipelineDestroy(
/// @param out_pCompletion
/// [out] An optional pointer to a COIEVENT object
/// that will be signaled when this run function has completed
-/// execution. The user may pass in NULL if they do not wish to signal
-/// any COIEVENTs when this run function completes.
+/// execution. The user may pass in NULL if they wish for this function
+/// to be synchronous, otherwise if a COIEVENT object is passed in the
+/// function is then asynchronous and closes after enqueuing the
+/// RunFunction and passes back the COIEVENT that will be signaled
+/// once the RunFunction has completed.
///
/// @return COI_SUCCESS if the function was successfully placed in a
-/// pipeline for future execution. Note that the actual
+/// pipeline for future execution. Note that the actual
/// execution of the function will occur in the future.
///
/// @return COI_OUT_OF_RANGE if in_NumBuffers is greater than
@@ -303,18 +322,10 @@ COIPipelineDestroy(
/// @return COI_ARGUMENT_MISMATCH if in_pReturnValue is non-NULL but
/// in_ReturnValueLen is zero.
///
-/// @return COI_ARGUMENT_MISMATCH if a COI_BUFFER_STREAMING_TO_SOURCE buffer
-/// is not passed with COI_SINK_WRITE_ENTIRE access flag.
-///
-/// @return COI_RESOURCE_EXHAUSTED if could not create a version for TO_SOURCE
-/// streaming buffer. It can fail if enough memory is not available to
-/// register. This call will succeed eventually when the registered
-/// memory becomes available.
-///
/// @return COI_RETRY if any input buffers, which are not pinned buffers,
/// are still mapped when passed to the run function.
///
-/// @return COI_MISSING_DEPENDENCY if buffer was not created on the process
+/// @return COI_MISSING_DEPENDENCY if buffer was not created on the process
/// associated with the pipeline that was passed in.
///
/// @return COI_OUT_OF_RANGE if any of the access flags in
diff --git a/liboffloadmic/include/coi/source/COIProcess_source.h b/liboffloadmic/include/coi/source/COIProcess_source.h
index b60e55225157..8cc6ffc3bf83 100644
--- a/liboffloadmic/include/coi/source/COIProcess_source.h
+++ b/liboffloadmic/include/coi/source/COIProcess_source.h
@@ -1,5 +1,5 @@
/*
- * Copyright 2010-2013 Intel Corporation.
+ * Copyright 2010-2015 Intel Corporation.
*
* This library is free software; you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as published
@@ -61,11 +61,17 @@ extern "C" {
/// This is a special COIPROCESS handle that can be used to indicate that
/// the source process should be used for an operation.
///
-#define COI_PROCESS_SOURCE ((COIPROCESS)-1)
+#define COI_PROCESS_SOURCE ((COIPROCESS)-1)
#define COI_MAX_FILE_NAME_LENGTH 256
///////////////////////////////////////////////////////////////////////////////
+/// This is a flag for COIProcessCreateFromMemory that indicates the passed in
+/// memory pointer is a fat binary file and should not have regular validation.
+///
+#define COI_FAT_BINARY ((uint64_t)-1)
+
+///////////////////////////////////////////////////////////////////////////////
///
/// Create a remote process on the Sink and start executing its main()
/// function.
@@ -74,14 +80,14 @@ extern "C" {
///
/// @param in_Engine
/// [in] A handle retrieved via a call to COIEngineGetHandle() that
-/// indicates which device to create the process on. This is
+/// indicates which device to create the process on. This is
/// necessary because there can be more than one device
/// within the system.
///
/// @param in_pBinaryName
/// [in] Pointer to a null-terminated string that contains the
/// path to the program binary to be instantiated as a process on
-/// the sink device. The file name will be accessed via
+/// the sink device. The file name will be accessed via
/// fopen and fread, as such, the passed in binary name must
/// be locatable via these commands. Also, the file name (without
/// directory information) will be used automatically by the system
@@ -121,8 +127,8 @@ extern "C" {
/// @param in_InitialBufferSpace
/// [in] The initial memory (in bytes) that will be pre-allocated at
/// process creation for use by buffers associated with this remote
-/// process. In addition to allocating, Intel® Coprocessor Offload
-/// Infrastructure (Intel® COI) will also fault in the
+/// process. In addition to allocating, Intel(R) Coprocessor Offload
+/// Infrastructure (Intel(R) COI) will also fault in the
/// memory during process creation. If the total size of the buffers
/// in use by this process exceed this initial size, memory on the
/// sink may continue to be allocated on demand, as needed, subject
@@ -186,7 +192,7 @@ COIProcessCreateFromFile(
///
/// @param in_Engine
/// [in] A handle retrieved via a call to COIEngineGetHandle() that
-/// indicates which device to create the process on. This is
+/// indicates which device to create the process on. This is
/// necessary because there can be more than one device
/// within the system.
///
@@ -236,8 +242,8 @@ COIProcessCreateFromFile(
/// @param in_InitialBufferSpace
/// [in] The initial memory (in bytes) that will be pre-allocated at
/// process creation for use by buffers associated with this remote
-/// process. In addition to allocating, Intel® Coprocessor
-/// Offload Infrastructure (Intel® COI) will also fault in the
+/// process. In addition to allocating, Intel(R) Coprocessor
+/// Offload Infrastructure (Intel(R) COI) will also fault in the
/// memory during process creation. If the total size of the buffers
/// in use by this process exceed this initial size, memory on the
/// sink may continue to be allocated on demand, as needed, subject
@@ -314,8 +320,8 @@ COIProcessCreateFromFile(
/// @return COI_PROCESS_DIED if at some point during the loading of the remote
/// process the remote process terminated abnormally.
///
-/// @return COI_VERSION_MISMATCH if the version of Intel® Coprocessor
-/// Offload Infrastructure (Intel® COI) on the host is not
+/// @return COI_VERSION_MISMATCH if the version of Intel(R) Coprocessor
+/// Offload Infrastructure (Intel(R) COI) on the host is not
/// compatible with the version on the device.
///
COIACCESSAPI
@@ -354,7 +360,7 @@ COIProcessCreateFromMemory(
/// [in] If this flag is set to true, then the sink process will be
/// forcibly terminated after the timeout has been reached. A timeout
/// value of 0 will kill the process immediately, while a timeout of
-/// -1 is invalid. If the flag is set to false then a message will
+/// -1 is invalid. If the flag is set to false then a message will
/// be sent to the sink process requesting a clean shutdown. A value
/// of false along with a timeout of 0 does not send a shutdown
/// message, instead simply polls the process to see if it is alive.
@@ -374,8 +380,8 @@ COIProcessCreateFromMemory(
/// be 0 if the remote process exited cleanly. If the remote process
/// exited abnormally this will contain the termination code given
/// by the operating system of the remote process. This is an optional
-/// parameter and the caller may pass in NULL if they are not
-/// interested in the termination code. The output value of this
+/// parameter and the caller may pass in NULL if they are not
+/// interested in the termination code. The output value of this
/// pointer is only meaningful if COI_SUCCESS is returned.
///
/// @return COI_SUCCESS if the process was destroyed.
@@ -390,8 +396,8 @@ COIProcessCreateFromMemory(
///
/// @return COI_TIME_OUT_REACHED if the sink process is still running after
/// waiting in_WaitForMainTimeout milliseconds and in_ForceDestroy
-/// is false. This is true even if in_WaitForMainTimeout was 0.
-/// In this case, out_pProcessReturn and out_pTerminationCode
+/// is false. This is true even if in_WaitForMainTimeout was 0.
+/// In this case, out_pProcessReturn and out_pTerminationCode
/// are undefined.
///
COIACCESSAPI
@@ -410,10 +416,10 @@ COIProcessDestroy(
///
/// Given a loaded native process, gets an array of function handles that can
/// be used to schedule run functions on a pipeline associated with that
-/// process. See the documentation for COIPipelineRunFunction() for
-/// additional information. All functions that are to be retrieved in this
-/// fashion must have the define COINATIVEPROCESSEXPORT preceeding their type
-/// specification. For functions that are written in C++, either the entries
+/// process. See the documentation for COIPipelineRunFunction() for
+/// additional information. All functions that are to be retrieved in this
+/// fashion must have the define COINATIVEPROCESSEXPORT preceding their type
+/// specification. For functions that are written in C++, either the entries
/// in in_pFunctionNameArray in must be pre-mangled, or the functions must be
/// declared as extern "C". It is also necessary to link the binary containing
/// the exported functions with the -rdynamic linker flag.
@@ -432,7 +438,7 @@ COIProcessDestroy(
/// @param in_ppFunctionNameArray
/// [in] Pointer to an array of null-terminated strings that match
/// the name of functions present in the code of the binary
-/// previously loaded via COIProcessCreate(). Note that if a C++
+/// previously loaded via COIProcessCreate(). Note that if a C++
/// function is used, then the string passed in must already be
/// properly name-mangled, or extern "C" must be used for where
/// the function is declared.
@@ -462,7 +468,7 @@ COIProcessDestroy(
/// the null.
///
/// @warning This operation can take several milliseconds so it is recommended
-/// that it only be be done at load time.
+/// that it only be done at load time.
///
COIACCESSAPI
COIRESULT
@@ -486,7 +492,7 @@ COIProcessGetFunctionHandles(
#define COI_LOADLIBRARY_DEEPBIND 0x00008
#define COI_LOADLIBRARY_NODELETE 0x01000
-/// Flags to replicate the behavior of the original version of
+/// Flags to replicate the behaviour of the original version of
/// COIProcessLoadLibrary* APIs.
#define COI_LOADLIBRARY_V1_FLAGS (COI_LOADLIBRARY_GLOBAL|COI_LOADLIBRARY_NOW)
@@ -796,13 +802,13 @@ COIProcessRegisterLibraries(
//////////////////////////////////////////////////////////////////////////////
/// The user can choose to have notifications for these internal events
/// so that they can build their own profiling and performance layer on
-/// top of Intel® Coprocessor Offload Infrastructure (Intel® COI) .
+/// top of Intel(R) Coprocessor Offload Infrastructure (Intel(R) COI).
///
typedef enum COI_NOTIFICATIONS
{
/// This event occurs when all explicit and implicit dependencies are
- /// satisified and Intel® Coprocessor Offload Infrastructure
- /// (Intel® COI) schedules the run function to begin execution.
+ /// satisfied and Intel(R) Coprocessor Offload Infrastructure
+ /// (Intel(R) COI) schedules the run function to begin execution.
RUN_FUNCTION_READY = 0,
/// This event occurs just before the run function actually starts
@@ -835,20 +841,17 @@ typedef enum COI_NOTIFICATIONS
//////////////////////////////////////////////////////////////////////////////
///
/// A callback that will be invoked to notify the user of an internal
-/// Intel® Coprocessor Offload Infrastructure (Intel® COI)
+/// Intel(R) Coprocessor Offload Infrastructure (Intel(R) COI)
/// event. Note that the callback is registered per process so any of the
/// above notifications that happen on the registered process will receive
/// the callback.
/// As with any callback mechanism it is up to the user to make sure that
-/// there are no possible deadlocks due to reentrancy (ie the callback being
+/// there are no possible deadlocks due to reentrancy (i.e. the callback being
/// invoked in the same context that triggered the notification) and also
/// that the callback does not slow down overall processing. If the user
/// performs too much work within the callback it could delay further
-/// Intel® Coprocessor Offload Infrastructure (Intel® COI)
-/// processing.
-/// Intel® Coprocessor Offload Infrastructure (Intel® COI)
-/// promises to invoke the callback for an internal event prior to
-/// signaling the corresponding COIEvent. For example, if a user is waiting
+/// processing. The callback will be invoked prior to the signaling of
+/// the corresponding COIEvent. For example, if a user is waiting
/// for a COIEvent associated with a run function completing they will
/// receive the callback before the COIEvent is marked as signaled.
///
@@ -865,11 +868,12 @@ typedef enum COI_NOTIFICATIONS
///
/// @param in_UserData
/// [in] Opaque data that was provided when the callback was
-/// registered. Intel® Coprocessor Offload Infrastructure (Intel® COI) simply passes this back to the user so that
+/// registered. Intel(R) Coprocessor Offload Infrastructure (Intel(R) COI)
+/// simply passes this back to the user so that
/// they can interpret it as they choose.
///
typedef void (*COI_NOTIFICATION_CALLBACK)(
- COI_NOTIFICATIONS in_Type,
+ COI_NOTIFICATIONS in_Type,
COIPROCESS in_Process,
COIEVENT in_Event,
const void* in_UserData);
@@ -878,7 +882,7 @@ typedef void (*COI_NOTIFICATION_CALLBACK)(
//////////////////////////////////////////////////////////////////////////////
///
/// Register a callback to be invoked to notify that an internal
-/// Intel® Coprocessor Offload Infrastructure (Intel® COI) event
+/// Intel(R) Coprocessor Offload Infrastructure (Intel(R) COI) event
/// has occured on the process that is associated with the callback.
/// Note that it is legal to have more than one callback registered with
/// a given process but those must all be unique callback pointers.
@@ -942,13 +946,13 @@ COIRESULT COIUnregisterNotificationCallback(
///
/// Set the user data that will be returned in the notification callback.
/// This data is sticky and per thread so must be set prior to the
-/// Intel® Coprocessor Offload Infrastructure (Intel® COI) //
+/// Intel(R) Coprocessor Offload Infrastructure (Intel(R) COI)
/// operation being invoked. If you wish to set the context to be returned
/// for a specific instance of a user event notification then the context
/// must be set using this API prior to registering that user event with
/// COIEventRegisterUserEvent.
-/// The value may be set prior to each Intel® Coprocessor Offload
-/// Infrastructure (Intel® COI) operation being called to
+/// The value may be set prior to each Intel(R) Coprocessor Offload
+/// Infrastructure (Intel(R) COI) operation being called to
/// effectively have a unique UserData per callback.
/// Setting this value overrides any value that was set when the
/// callback was registered and will also override any future registrations
@@ -962,6 +966,266 @@ COIACCESSAPI
void COINotificationCallbackSetContext(
const void* in_UserData);
+
+/// @name COIProcessSetCacheSize flags.
+/// Flags are divided into two categories: _MODE_ and _ACTION_
+/// only one of each is valid with each call.
+/// _ACTIONS_ and _MODES_ should be bitwised OR'ed together, i.e. |
+//@{
+
+/// Current set of DEFINED bits for _MODE_, can be used
+/// to clear or check fields, not useful to pass into APIs. Used internally.
+#define COI_CACHE_MODE_MASK 0x00000007
+
+/// Flag to indicate to keep the previous mode of operation. By default
+/// this would be COI_CACHE_MODE_ONDEMAND_SYNC. As of this release
+/// This is the only mode available. This mode is valid with _ACTION_
+/// flags.
+#define COI_CACHE_MODE_NOCHANGE 0x00000001
+
+/// Mode of operation that indicates that COI will allocate physical
+/// cache memory exactly when it is is needed. COIPipeline execution in
+/// the given process will momentarily block until the allocation request
+/// is completed. This is and has been the default mode.
+#define COI_CACHE_MODE_ONDEMAND_SYNC 0x00000002
+
+/// Not yet implemented. Future mode that will not stall a COIPipeline
+/// but prefer eviction/paging if possible as to immediately execute pipeline.
+/// At the same time, enqueue background requests to allocate extra cache
+/// so as to provide optimze behavior on subsequent runs.
+#define COI_CACHE_MODE_ONDEMAND_ASYNC 0x00000004
+
+
+/// Current set of DEFINED bits for _ACTION_ can be used
+/// to clear fields, but not useful to pass into API's. Used internally.
+#define COI_CACHE_ACTION_MASK 0x00070000
+
+/// No action requested. With this flag specified
+/// it is recommended to NOT provide a out_pCompletion event,
+/// as with this flag, modes and values are immediately set.
+/// This is valid with _MODE_ flags.
+#define COI_CACHE_ACTION_NONE 0x00010000
+
+/// This _ACTION_ flag will immediately attempt to increase the cache
+/// physical memory size to the current set pool size(s). Used to
+/// pre-allocate memory on remote processes, so that runfunction will
+/// enqueue faster. Also may prevent unused buffer eviction from process
+/// reducing overhead in trade for memory allocation cost.
+#define COI_CACHE_ACTION_GROW_NOW 0x00020000
+
+/// Not yet implemented. Future _ACTION_ that will attempt to find unused
+/// allocated cache and free it, with the express goal of reducing the
+/// footprint on the remote process down to the value of the currently set
+/// pool size(s).
+#define COI_CACHE_ACTION_FREE_UNUSED 0x00040000
+
+//@}
+
+//////////////////////////////////////////////////////////////////////////////
+///
+/// Set the minimum preferred COIProcess cache size. By default these values
+/// are set to 1GB. With the default size of 1GB, Intel(R) COI will only
+/// grow the cache with each new buffer up until the set limit is consumed,
+/// after which, only required to accommodate additional buffers.
+/// This means that after the cache preference is met, a process will act
+/// as conservative as possible for memory consumption.
+/// This API will allow users to adjust memory consumption aggressiveness.
+///
+/// Additional performance may be gained if the user sets a value higher than
+/// default. With high memory consumption user can choose to trade performance
+/// between memory allocation cost and transfer speeds to and from the
+/// remote process. A last consideration is that if buffers are used only
+/// once, it may be best to keep a small cache size, or ensure buffers are
+/// fully destroyed after their use.
+///
+/// Adjusting this value to high may result in out of resource conditions.
+///
+/// @param in_pProcess
+/// [in] Handle to uniquely identify the process for which the cache
+/// is to be adjusted.
+///
+/// @param in_HugePagePoolSize
+/// [in] The suggested size of the remote huge page cache in bytes.
+/// This value defaults to 1GB. A process will only allocate cache
+/// memory if the current cache is smaller than this limit, or it is
+/// absolutely necessary to fulfill a request, but preferring to
+/// re-use existing memory and paging unused buffers back to the host
+/// Increasing this value will cause a process to
+/// aggressively allocate memory on demand up to this value, before
+/// evicting/paging memory from the remote process back to the host
+/// process.
+///
+/// The net result is that memory consumption is increased, but the
+/// user can 'cache' more buffers on the remote process. More time
+/// may be spent during first use of run functions as more memory
+/// may be allocated, but subsequent run functions will likely
+/// see an increase in queueing performance as the data is already
+/// valid in the remote process.
+///
+/// Users should tune this value for optimum performance balanced
+/// against memory consumption. This value does not affect 4K page
+/// cache. Please use in_SmallPagePoolSize for 4K pages.
+///
+/// @param in_HugeFlags
+/// [in] Flags to select mode or action for huge page cache. One _MODE_
+/// and one _ACTION_ flag are specified together. Default _MODE_ is
+/// COI_CACHE_MODE_ONDEMAND_SYNC. See all COI_CACHE_MODE_* and
+/// COI_CACHE_ACTION_* for other modes and actions. Default _ACTION_
+/// is COI_CACHE_ACTION_NONE.
+///
+/// @param in_SmallPagePoolSize
+/// [in] The suggested size of the remote 4K cache in bytes. Same
+/// function as in_HugePagePoolSize but affecting only 4K page cache.
+/// Defaults to 1GB.
+///
+/// @param in_SmallFlags
+/// [in] Flags to select mode or action for 4K page cache. One _MODE_
+/// and one _ACTION_ flag are be specified together. Default _MODE_ is
+/// COI_CACHE_MODE_ONDEMAND_SYNC. See all COI_CACHE_MODE_* and
+/// COI_CACHE_ACTION_* for other modes and actions.
+///
+/// @param in_NumDependencies
+/// [in] The number of dependencies specified in the in_pDependencies
+/// array. This may be 0 if the caller does not want the call to
+/// wait for any events to be signaled.
+///
+/// @param in_pDependencies
+/// [in] An optional array of handles to previously created COIEVENT
+/// objects that this operation will wait for before starting.
+/// This allows the user to create dependencies between asynchronous
+/// calls and other operations such as run functions. The user may
+/// pass in NULL if they do not wish to wait for any dependencies.
+/// Only useful with _ACTION_ flags, otherwise there is no action
+/// to wait on. All _MODE_ changes happen immediately.
+///
+/// @param out_pCompletion
+/// [out] An optional pointer to a COIEVENT object that will be
+/// signaled when the operation is complete. The user may pass in
+/// NULL if the user wants the operation to block until completed.
+/// Note: This flag is not useful unless paired with a
+/// valid _ACTION_ flag.
+///
+/// @return COI_SUCCESS if the cache was successfully adjusted. In case of
+/// valid flags including _ACTION_, if out_pCompletion was specified,
+/// this does not indicate the operation succeeded, but rather only
+/// it was successfully queued. For further information see
+/// that COIEventWait() for getting return values.
+///
+/// @return COI_INVALID_HANDLE if the in_Process handle passed in was invalid.
+///
+/// @return COI_RESOURCE_EXHAUSTED if no more cache can be created,
+/// possibly, but not necessarily because a pool size was set to large
+/// and COI_CACHE_ACTION_GROW_NOW was specified.
+///
+/// @return COI_NOT_SUPPORTED if more than one _MODE_ or _ACTION_ was
+/// specified.
+///
+/// @return COI_NOT_SUPPORTED if an invalid _MODE_ or _ACTION_ was
+/// specified.
+///
+/// @return COI_ARGUMENT_MISMATCH if in_NumDependencies is non-zero while
+/// in_pDependencies was passed in as NULL.
+///
+/// @return COI_OUT_OF_RANGE if one of the pool sizes was invalid.
+///
+/// @return COI_PROCESS_DIED if at some point during the mode or action the
+/// remote process terminated abnormally. Possible due to an out of
+/// memory condition.
+///
+COIACCESSAPI
+COIRESULT COIProcessSetCacheSize(
+ const COIPROCESS in_Process,
+ const uint64_t in_HugePagePoolSize,
+ const uint32_t in_HugeFlags,
+ const uint64_t in_SmallPagePoolSize,
+ const uint32_t in_SmallFlags,
+ uint32_t in_NumDependencies,
+ const COIEVENT* in_pDependencies,
+ COIEVENT* out_pCompletion);
+
+
+//////////////////////////////////////////////////////////////////////////////
+/// These are the different modes of operation that can be selected for
+/// the COI_DMA_MODE by the API COIProcessConfigureDMA. They allow the user
+/// to customize the DMA layer behaviour.
+///
+typedef enum COI_DMA_MODE
+{
+ /// This mode will use one common logical channel for all DMA operations.
+ /// Using this mode requires a channel count of one.
+ COI_DMA_MODE_SINGLE = 0,
+
+ /// This mode will dedicate on logical channel for write operations
+ /// and one logical channel for read operations. Requires a minimum of
+ /// two logical channels, if more than two are used they are ignored
+ /// in the current implementation.
+ COI_DMA_MODE_READ_WRITE,
+
+ /// This mode is not yet implemented and is a placeholder for future
+ /// releases. Check here for updates when it is implemented.
+ /// Will require a minimum of two logical channels and a maximum
+ /// of four channels.
+ COI_DMA_MODE_ROUND_ROBIN,
+
+ /// Reserved for internal use.
+ COI_DMA_RESERVED
+} COI_DMA_MODE;
+
+
+//////////////////////////////////////////////////////////////////////////////
+///
+/// Set the number and mode of the physical DMA channels that each COIProcess
+/// will establish during COIProcess creation.
+///
+/// By default the runtime will operate in COI_DMA_MODE_SINGLE mode.
+/// This API is intended to be called before COIProcessCreateFromFile() or
+/// COIProcessCreateFromMemory(). The values are stored globally and will
+/// be used by the creation API's. It is possible to call this API once
+/// before each new COIPROCESS is created and thus have each COIPROCESS
+/// run in different modes. It is not possible to change the mode on an
+/// existing COIPROCESS.
+///
+/// The larger number of logical connections requested will impose a
+/// performance penalty on the COIBUFFER creation API's, but unlock better
+/// parallelism for DMA transfers during runtime.
+///
+/// A maximum value of four (4) channels is available today, but current
+/// implementation will only take advantage of two DMA channels. The option
+/// is left available for programmers to use in case future implementations
+/// provide performance advantages.
+///
+/// It is important to note that for some operations that enabling this
+/// options may increase parallelism and require the user to enforce
+/// explicit dependencies for operations on the same buffers. See documentation
+/// for COIBufferRead/Write/Copy operations for more details.
+///
+/// @param in_Channels
+/// [in] Number of logical connections to the remote COIProcess that
+/// the runtime will establish and use for DMA transfer requests.
+/// Will be ignored if in_Mode is set to COI_DMA_MODE_SINGLE.
+///
+/// @param in_Mode
+/// [in] The mode of operation in which the runtime will use the
+/// logical connections to the remote COIProcess.
+///
+/// @return COI_SUCCESS if the mode and number of DMA channels requested
+/// is valid. The actual create creation of channels and modes is
+/// done during COIProcessCreateFromFile() and
+/// COIProcessCreateFromMemory().
+///
+/// @return COI_NOT_SUPPORTED if an invalid value for in_Channels or
+/// in_Mode was requested.
+///
+/// @return COI_ARGUMENT_MISMATCH if an invalid combination of in_Channels and
+/// in_Mode was requested. Example could be 2 channels with
+/// COI_DMA_MODE_SINGLE, or 1 channel with COI_DMA_MODE_READ_WRITE.
+///
+COIACCESSAPI
+COIRESULT COIProcessConfigureDMA(
+ const uint64_t in_Channels,
+ const COI_DMA_MODE in_Mode);
+
+
#ifdef __cplusplus
} /* extern "C" */
#endif
diff --git a/liboffloadmic/include/myo/myo.h b/liboffloadmic/include/myo/myo.h
index c6e5f56ede21..f6f14015c258 100644
--- a/liboffloadmic/include/myo/myo.h
+++ b/liboffloadmic/include/myo/myo.h
@@ -1,5 +1,5 @@
/*
- * Copyright 2010-2013 Intel Corporation.
+ * Copyright 2010-2015 Intel Corporation.
*
* This library is free software; you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as published
diff --git a/liboffloadmic/include/myo/myoimpl.h b/liboffloadmic/include/myo/myoimpl.h
index d998ef3feb60..c5a1a41935ed 100644
--- a/liboffloadmic/include/myo/myoimpl.h
+++ b/liboffloadmic/include/myo/myoimpl.h
@@ -1,5 +1,5 @@
/*
- * Copyright 2010-2013 Intel Corporation.
+ * Copyright 2010-2015 Intel Corporation.
*
* This library is free software; you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as published
@@ -459,11 +459,37 @@ extern MyoError myoiTargetSharedMallocTableRegister(
* return -1;
* }
* @endcode
- * This intialization is required only in the client/host side
- * of the application. The server/card side executable should be
- * executed only on the second card in this case.
+ * This intialization is required only in the client/host side
+ * of the application. The server/card side executable should be
+ * executed only on the second card in this case.
+ *
+ * Another capability for the MyoiUserParams structure in MYO is specifying
+ * a remote procedure call to be executed on the host or card, immediately after
+ * myoiLibInit() completes. This capability is useful because some calls in
+ * MYO return immediately, but do not actually complete until after the MYO
+ * library is completely initialized on all peers. An example follows,
+ * showing how to cause MYO to execute the registered function named
+ * "PostMyoLibInitFunction" on the first card only:
+ * @code
+ * MyoiUserParams UserParas[64];
+ * UserParas[0].type = MYOI_USERPARAMS_POST_MYO_LIB_INIT_FUNC;
+ * UserParas[0].nodeid = 1;
+ * SetPostLibInitFuncName(UserParas[1], "PostMyoLibInitFunction");
+ * UserParas[2].type = MYOI_USERPARAMS_LAST_MSG;
+ * if(MYO_SUCCESS != myoiLibInit(&UserParas, (void*)&myoiUserInit)) {
+ * printf("Failed to initialize MYO runtime\n");
+ * return -1;
+ * }
+ * @endcode
+ *
+ * Note, to cause PostMyoLibInitFunction to be executed on ALL cards,
+ * specify: MYOI_USERPARAMS_POST_MYO_LIB_INIT_FUNC_ALL_NODES for the nodeid.
+ * That is:
+ * @code
+ * UserParas[0].nodeid = MYOI_USERPARAMS_POST_MYO_LIB_INIT_FUNC_ALL_NODES;
+ * @endcode
*
- * @param userInitFunc Shared variables and remote funtions are
+ * @param userInitFunc Shared variables and remote functions are
* registered in this routine, which is called by the runtime during
* library initialization.
* @return
@@ -473,6 +499,22 @@ extern MyoError myoiTargetSharedMallocTableRegister(
MYOACCESSAPI
MyoError myoiLibInit(void * in_args, void *userInitFunc /*userInitFunc must be: MyoError (*userInitFunc)(void) */);
+/** @fn extern MyoError myoiSupportsFeature(MyoFeatureType myoFeature)
+ * @brief Supports runtime query to determine whether a feature is supported
+ * by the myo that is installed on the system. This function is intended to
+ * support client code to query the myo library to determine whether its set
+ * of capabilities are able to support the client's needs.
+ *
+ * @param myoFeature The feature that is to be inquired about.
+ * @return
+ * MYO_SUCCESS; if the feature is supported.
+ * MYO_FEATURE_NOT_IMPLEMENTED if the feature is not supported.
+ *
+ * (For more information, please also see the declaration of the MyoFeatureType enum declaration.)
+ **/
+MYOACCESSAPI
+MyoError myoiSupportsFeature(MyoFeatureType myoFeature);
+
/** @fn void myoiLibFini()
* @brief Finalize the MYO library, all resources held by the runtime are
* released by this routine.
@@ -519,17 +561,56 @@ MyoError myoiSetMemConsistent(void *in_pAddr, size_t in_Size);
EXTERN_C MYOACCESSAPI unsigned int myoiMyId; /* MYO_MYID if on accelerators */
EXTERN_C MYOACCESSAPI volatile int myoiInitFlag;
-
- //! Structure of the array element that is passed to myoiLibInit() to initialize a subset of the available cards.
-typedef struct{
- //!type = MYOI_USERPARAMS_DEVID for each element in the array except the last element ; type = MYOI_USERPARAMS_LAST_MSG for the last element in the array.
+ //! Structure of the array element that is passed to myoiLibInit() to initialize a subset of the available cards, or
+ //! to specify a remote call function to be called after successful myo library initialization:
+typedef struct {
+ //!type = MYOI_USERPARAMS_DEVID or MYOI_USERPARAMS_POST_MYO_LIB_INIT_FUNC for each element in the array except
+ //!the last element, type should be: MYOI_USERPARAMS_LAST_MSG.
int type;
- //!nodeid refers to the card index.
+ //! nodeid refers to the 'one-based' card index. Specifying, 1 represents the first card, mic0, 2 represents the
+ // second card, mic1, 3 represents the third card, mic2, ....).
+ // NOTE: for type == MYOI_USERPARAMS_POST_MYO_LIB_INIT_FUNC, specifying MYOI_USERPARAMS_POST_MYO_LIB_INIT_FUNC_ALL_NODES
+ // for nodeid, will execute the named function, on each card in the system, mic0, mic1, mic2, .... micn.
int nodeid;
-}MyoiUserParams;
-
-#define MYOI_USERPARAMS_DEVID 1
-#define MYOI_USERPARAMS_LAST_MSG -1
+} MyoiUserParams;
+
+//!The following two types are dealt with entirely with just one MyoiUserParams structure:
+//!MYOI_USERPARAMS_DEVID maps node ids.
+#define MYOI_USERPARAMS_DEVID 1
+//!MYOI_USERPARAMS_LAST_MSG terminates the array of MyoiUserParams.
+#define MYOI_USERPARAMS_LAST_MSG -1
+
+//!The following type requires setting the node id in a MyoiUserParams structure, and then following the struct
+//!with a MyoiUserParamsPostLibInit union:
+#define MYOI_USERPARAMS_POST_MYO_LIB_INIT_FUNC 2
+//!nodeid can be one of the following macros, or a number >=1, corresponding to the card number (1 == mic0,
+//!2 == mic1, 3 == mic2, ....)
+//!Setting nodeid to MYOI_USERPARAMS_POST_MYO_LIB_INIT_FUNC_ALL_NODES causes the function to be called on all
+//!cards:
+#define MYOI_USERPARAMS_POST_MYO_LIB_INIT_FUNC_ALL_NODES 0
+//!Setting nodeid to MYOI_USERPARAMS_POST_MYO_LIB_INIT_FUNC_HOST_NODE causes the function to be called on the
+//!host instead of the card:
+#define MYOI_USERPARAMS_POST_MYO_LIB_INIT_FUNC_HOST_NODE -1
+
+//!The postLibInit union contains two members that serves two different purposes:
+//!1. It can be used to stipulate the name of the function to be remotely called from host to card, on successful
+//!myo library initialization, (member postLibInitRemoveFuncName) using the type:
+//!MYOI_USERPARAMS_POST_MYO_LIB_INIT_FUNC. OR
+//!2. It can be an actual function pointer (member name: postLibInitHostFuncAddress) that will be called on the host,
+//!on successful myo library initialization, using the type: MYOI_USERPARAMS_POST_MYO_LIB_INIT_FUNC, with nodeid:
+//!MYOI_USERPARAMS_POST_MYO_LIB_INIT_FUNC_HOST_NODE
+typedef union {
+ const char *postLibInitRemoveFuncName;
+ void (*postLibInitHostFuncAddress)(void);
+} MyoiUserParamsPostLibInit;
+
+/* These are two macros to help get the information in a MyoiUserParamsPostLibInit union from a MyoiUserParams struct; */
+#define GetPostLibInitFuncName(USERPARAMS) ((MyoiUserParamsPostLibInit *) (& (USERPARAMS)))->postLibInitRemoveFuncName
+#define GetPostLibInitFuncAddr(USERPARAMS) ((MyoiUserParamsPostLibInit *) (& (USERPARAMS)))->postLibInitHostFuncAddress
+
+/* These are two macros to help set the information in a MyoiUserParamsPostLibInit union from a MyoiUserParams struct; */
+#define SetPostLibInitFuncName(USERPARAMS,FUNC_NAME) GetPostLibInitFuncName(USERPARAMS) = FUNC_NAME
+#define SetPostLibInitFuncAddr(USERPARAMS,FUNC_ADDR) GetPostLibInitFuncAddr(USERPARAMS) = FUNC_ADDR
#ifdef __cplusplus
}
diff --git a/liboffloadmic/include/myo/myotypes.h b/liboffloadmic/include/myo/myotypes.h
index 81464d1666f0..596ad05280cb 100644
--- a/liboffloadmic/include/myo/myotypes.h
+++ b/liboffloadmic/include/myo/myotypes.h
@@ -1,5 +1,5 @@
/*
- * Copyright 2010-2013 Intel Corporation.
+ * Copyright 2010-2015 Intel Corporation.
*
* This library is free software; you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as published
@@ -74,7 +74,8 @@ typedef enum {
MYO_ALREADY_EXISTS, /*!< Already Exists */
- MYO_EOF, /*!< EOF */
+ MYO_EOF, /*!< EOF */
+ MYO_FEATURE_NOT_IMPLEMENTED = -1, /*!< Feature not implemented (see myoiSupportsFeature(). */
} MyoError;
@@ -84,6 +85,40 @@ typedef enum {
MYO_ARENA_OURS, /*!< Arena OURS Ownership */
} MyoOwnershipType;
+ /*! MYO Features */
+typedef enum {
+ /*!< EVERY VALUE that is less than MYO_FEATURE_BEGIN is not implemented. */
+ MYO_FEATURE_BEGIN = 1, /*!< The first feature that is supported. */
+ MYO_FEATURE_POST_LIB_INIT = MYO_FEATURE_BEGIN, /*!< Allows specifying a function to be executed immediately */
+ /* after myoiLibInit() completes. This feature was implemented in version */
+ /* 3.3 of MPSS. */
+ /* MYO_FEATURE_FUTURE_CAPABILITY = 2, at some time in the future, as new features are added to MYO, new enumeration constants */
+ /* will be added to the MyoFeatureType, and the value of the new enumeration constant will be greater */
+ /* than the current value of MYO_FEATURE_LAST constant, and then the MYO_FEATURE_LAST constant too, */
+ /* will be changed to be the value of the new enumeration constant. For example, in April, 2014, */
+ /* the POST_LIB_INIT feature was implemented in version 3.3 of MPSS, and the MYO_FEATURE_BEGIN */
+ /* enumeration constant is the same as the MYO_FEATURE_LAST enumeration constant, and both are equal */
+ /* to 1. */
+ /* Suppose in December, 2014, a new feature is added to the MYO library, for version 3.4 of MPSS. */
+ /* Then, MYO_FEATURE_BEGIN enumeration constant will be still the value 1, but the MYO_FEATURE_LAST */
+ /* enumeration constant will be set to 2. */
+ /* At runtime, one client binary can determine if the MYO that is installed is capable of any */
+ /* capability. For example, suppose a future client binary queries version 3.3 of MYO if it is */
+ /* capable of some future feature. Version 3.3 of MYO will indicate that the feature is not */
+ /* implemented to the client. But, conversely, suppose the future client queries version 3.4 of MYO */
+ /* if it is capable of some future feature. Version 3.4 of MYO will indicate that the feature isd */
+ /* supported. */
+ /* */
+ /* Date: | MYO_FEATURE_BEGIN: | MYO_FEATURE_LAST: | MPSS VERSION: | myoiSupportsFeature(MYO_FEATURE_FUTURE_CAPABILITY) */
+ /* ---------------+---------------------+--------------------+---------------+--------------------------------------------------- */
+ /* April, 2014 | 1 | 1 | 3.3 | MYO_FEATURE_NOT_IMPLEMENTED */
+ /* December, 2014 | 1 | 2 | 3.4 | MYO_SUCCESS */
+ /* ---------------+---------------------+--------------------+---------------+--------------------------------------------------- */
+ MYO_FEATURE_LAST = MYO_FEATURE_POST_LIB_INIT, /*!< The last feature that is supported. */
+ /*!< EVERY VALUE that is greater than MYO_FEATURE_LAST is not implemented. */
+ /*!< EVERY VALUE that is greater than or equal to MYO_FEATURE_BEGIN AND less than or equal to MYO_FEATURE_LAST is implemented. */
+} MyoFeatureType; /* (For more information, please also see myoiSupportsFeature() function declaration.) */
+
/*************************************************************
* define the property of MYO Arena
***********************************************************/
diff --git a/liboffloadmic/plugin/Makefile.am b/liboffloadmic/plugin/Makefile.am
index 6ec444ccb4c1..9ff8c9a06bec 100644
--- a/liboffloadmic/plugin/Makefile.am
+++ b/liboffloadmic/plugin/Makefile.am
@@ -35,7 +35,6 @@ ACLOCAL_AMFLAGS = -I ../.. -I ../../config
build_dir = $(top_builddir)
source_dir = $(top_srcdir)
coi_inc_dir = $(top_srcdir)/../include/coi
-myo_inc_dir = $(top_srcdir)/../include/myo
include_src_dir = $(top_srcdir)/../../include
libgomp_src_dir = $(top_srcdir)/../../libgomp
libgomp_dir = $(build_dir)/../../libgomp
@@ -53,12 +52,12 @@ target_install_dir = $(accel_search_dir)/lib/gcc/$(accel_target)/$(gcc_version)$
if PLUGIN_HOST
toolexeclib_LTLIBRARIES = libgomp-plugin-intelmic.la
libgomp_plugin_intelmic_la_SOURCES = libgomp-plugin-intelmic.cpp
- libgomp_plugin_intelmic_la_CPPFLAGS = $(CPPFLAGS) -DLINUX -DCOI_LIBRARY_VERSION=2 -DMYO_SUPPORT -DOFFLOAD_DEBUG=1 -DSEP_SUPPORT -DTIMING_SUPPORT -DHOST_LIBRARY=1 -I$(coi_inc_dir) -I$(myo_inc_dir) -I$(liboffload_src_dir) -I$(libgomp_src_dir) -I$(libgomp_dir) -I$(include_src_dir) -I$(target_prefix_dir)/include -I$(target_build_dir) -I$(target_install_dir)/include
+ libgomp_plugin_intelmic_la_CPPFLAGS = $(CPPFLAGS) -DLINUX -DCOI_LIBRARY_VERSION=2 -DOFFLOAD_DEBUG=1 -DSEP_SUPPORT -DTIMING_SUPPORT -DHOST_LIBRARY=1 -I$(coi_inc_dir) -I$(liboffload_src_dir) -I$(libgomp_src_dir) -I$(libgomp_dir) -I$(include_src_dir) -I$(target_prefix_dir)/include -I$(target_build_dir) -I$(target_install_dir)/include
libgomp_plugin_intelmic_la_LDFLAGS = -L$(liboffload_dir)/.libs -loffloadmic_host -version-info 1:0:0
else # PLUGIN_TARGET
plugin_includedir = $(libsubincludedir)
plugin_include_HEADERS = main_target_image.h
- AM_CPPFLAGS = $(CPPFLAGS) -DLINUX -DCOI_LIBRARY_VERSION=2 -DMYO_SUPPORT -DOFFLOAD_DEBUG=1 -DSEP_SUPPORT -DTIMING_SUPPORT -DHOST_LIBRARY=0 -I$(coi_inc_dir) -I$(myo_inc_dir) -I$(liboffload_src_dir) -I$(libgomp_dir)
+ AM_CPPFLAGS = $(CPPFLAGS) -DLINUX -DCOI_LIBRARY_VERSION=2 -DOFFLOAD_DEBUG=1 -DSEP_SUPPORT -DTIMING_SUPPORT -DHOST_LIBRARY=0 -I$(coi_inc_dir) -I$(liboffload_src_dir) -I$(libgomp_dir)
AM_CXXFLAGS = $(CXXFLAGS)
AM_LDFLAGS = -L$(liboffload_dir)/.libs -L$(libgomp_dir)/.libs -loffloadmic_target -lcoi_device -lmyo-service -lgomp -rdynamic
endif
diff --git a/liboffloadmic/plugin/Makefile.in b/liboffloadmic/plugin/Makefile.in
index 458c9b20bc57..7d60476f70f9 100644
--- a/liboffloadmic/plugin/Makefile.in
+++ b/liboffloadmic/plugin/Makefile.in
@@ -305,7 +305,6 @@ ACLOCAL_AMFLAGS = -I ../.. -I ../../config
build_dir = $(top_builddir)
source_dir = $(top_srcdir)
coi_inc_dir = $(top_srcdir)/../include/coi
-myo_inc_dir = $(top_srcdir)/../include/myo
include_src_dir = $(top_srcdir)/../../include
libgomp_src_dir = $(top_srcdir)/../../libgomp
libgomp_dir = $(build_dir)/../../libgomp
@@ -321,11 +320,11 @@ target_build_dir = $(accel_search_dir)/$(accel_target)$(MULTISUBDIR)/liboffloadm
target_install_dir = $(accel_search_dir)/lib/gcc/$(accel_target)/$(gcc_version)$(MULTISUBDIR)
@PLUGIN_HOST_TRUE@toolexeclib_LTLIBRARIES = libgomp-plugin-intelmic.la
@PLUGIN_HOST_TRUE@libgomp_plugin_intelmic_la_SOURCES = libgomp-plugin-intelmic.cpp
-@PLUGIN_HOST_TRUE@libgomp_plugin_intelmic_la_CPPFLAGS = $(CPPFLAGS) -DLINUX -DCOI_LIBRARY_VERSION=2 -DMYO_SUPPORT -DOFFLOAD_DEBUG=1 -DSEP_SUPPORT -DTIMING_SUPPORT -DHOST_LIBRARY=1 -I$(coi_inc_dir) -I$(myo_inc_dir) -I$(liboffload_src_dir) -I$(libgomp_src_dir) -I$(libgomp_dir) -I$(include_src_dir) -I$(target_prefix_dir)/include -I$(target_build_dir) -I$(target_install_dir)/include
+@PLUGIN_HOST_TRUE@libgomp_plugin_intelmic_la_CPPFLAGS = $(CPPFLAGS) -DLINUX -DCOI_LIBRARY_VERSION=2 -DOFFLOAD_DEBUG=1 -DSEP_SUPPORT -DTIMING_SUPPORT -DHOST_LIBRARY=1 -I$(coi_inc_dir) -I$(liboffload_src_dir) -I$(libgomp_src_dir) -I$(libgomp_dir) -I$(include_src_dir) -I$(target_prefix_dir)/include -I$(target_build_dir) -I$(target_install_dir)/include
@PLUGIN_HOST_TRUE@libgomp_plugin_intelmic_la_LDFLAGS = -L$(liboffload_dir)/.libs -loffloadmic_host -version-info 1:0:0
@PLUGIN_HOST_FALSE@plugin_includedir = $(libsubincludedir)
@PLUGIN_HOST_FALSE@plugin_include_HEADERS = main_target_image.h
-@PLUGIN_HOST_FALSE@AM_CPPFLAGS = $(CPPFLAGS) -DLINUX -DCOI_LIBRARY_VERSION=2 -DMYO_SUPPORT -DOFFLOAD_DEBUG=1 -DSEP_SUPPORT -DTIMING_SUPPORT -DHOST_LIBRARY=0 -I$(coi_inc_dir) -I$(myo_inc_dir) -I$(liboffload_src_dir) -I$(libgomp_dir)
+@PLUGIN_HOST_FALSE@AM_CPPFLAGS = $(CPPFLAGS) -DLINUX -DCOI_LIBRARY_VERSION=2 -DOFFLOAD_DEBUG=1 -DSEP_SUPPORT -DTIMING_SUPPORT -DHOST_LIBRARY=0 -I$(coi_inc_dir) -I$(liboffload_src_dir) -I$(libgomp_dir)
@PLUGIN_HOST_FALSE@AM_CXXFLAGS = $(CXXFLAGS)
@PLUGIN_HOST_FALSE@AM_LDFLAGS = -L$(liboffload_dir)/.libs -L$(libgomp_dir)/.libs -loffloadmic_target -lcoi_device -lmyo-service -lgomp -rdynamic
diff --git a/liboffloadmic/runtime/cean_util.cpp b/liboffloadmic/runtime/cean_util.cpp
index 3258d7f3ade7..88bfa0d735f7 100644
--- a/liboffloadmic/runtime/cean_util.cpp
+++ b/liboffloadmic/runtime/cean_util.cpp
@@ -1,5 +1,5 @@
/*
- Copyright (c) 2014 Intel Corporation. All Rights Reserved.
+ Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
@@ -34,7 +34,7 @@
// 1. allocate element of CeanReadRanges type
// 2. initialized it for reading consequently contiguous ranges
// described by "ap" argument
-CeanReadRanges * init_read_ranges_arr_desc(const arr_desc *ap)
+CeanReadRanges * init_read_ranges_arr_desc(const Arr_Desc *ap)
{
CeanReadRanges * res;
@@ -57,6 +57,8 @@ CeanReadRanges * init_read_ranges_arr_desc(const arr_desc *ap)
(ap->rank - rank) * sizeof(CeanReadDim));
if (res == NULL)
LIBOFFLOAD_ERROR(c_malloc);
+
+ res->arr_desc = const_cast<Arr_Desc*>(ap);
res->current_number = 0;
res->range_size = length;
res->last_noncont_ind = rank;
@@ -82,7 +84,7 @@ CeanReadRanges * init_read_ranges_arr_desc(const arr_desc *ap)
return res;
}
-// check if ranges described by 1 argument could be transfered into ranges
+// check if ranges described by 1 argument could be transferred into ranges
// described by 2-nd one
bool cean_ranges_match(
CeanReadRanges * read_rng1,
@@ -118,7 +120,7 @@ bool get_next_range(
return true;
}
-bool is_arr_desc_contiguous(const arr_desc *ap)
+bool is_arr_desc_contiguous(const Arr_Desc *ap)
{
int64_t rank = ap->rank - 1;
int64_t length = ap->dim[rank].size;
@@ -146,14 +148,22 @@ int64_t cean_get_transf_size(CeanReadRanges * read_rng)
}
static uint64_t last_left, last_right;
-typedef void (*fpp)(const char *spaces, uint64_t low, uint64_t high, int esize);
+
+typedef void (*fpp)(
+ const char *spaces,
+ uint64_t low,
+ uint64_t high,
+ int esize,
+ bool print_values
+);
static void generate_one_range(
const char *spaces,
uint64_t lrange,
uint64_t rrange,
fpp fp,
- int esize
+ int esize,
+ bool print_values
)
{
OFFLOAD_TRACE(3,
@@ -168,20 +178,35 @@ static void generate_one_range(
// Extend previous range, don't print
}
else {
- (*fp)(spaces, last_left, last_right, esize);
+ (*fp)(spaces, last_left, last_right, esize, print_values);
last_left = lrange;
}
}
last_right = rrange;
}
+static bool element_is_contiguous(
+ uint64_t rank,
+ const struct Dim_Desc *ddp
+)
+{
+ if (rank == 1) {
+ return (ddp[0].lower == ddp[0].upper || ddp[0].stride == 1);
+ }
+ else {
+ return ((ddp[0].size == (ddp[1].upper-ddp[1].lower+1)*ddp[1].size) &&
+ element_is_contiguous(rank-1, ddp++));
+ }
+}
+
static void generate_mem_ranges_one_rank(
const char *spaces,
uint64_t base,
uint64_t rank,
- const struct dim_desc *ddp,
+ const struct Dim_Desc *ddp,
fpp fp,
- int esize
+ int esize,
+ bool print_values
)
{
uint64_t lindex = ddp->lindex;
@@ -194,35 +219,40 @@ static void generate_mem_ranges_one_rank(
"generate_mem_ranges_one_rank(base=%p, rank=%lld, lindex=%lld, "
"lower=%lld, upper=%lld, stride=%lld, size=%lld, esize=%d)\n",
spaces, (void*)base, rank, lindex, lower, upper, stride, size, esize);
- if (rank == 1) {
+
+ if (element_is_contiguous(rank, ddp)) {
uint64_t lrange, rrange;
- if (stride == 1) {
- lrange = base + (lower-lindex)*size;
- rrange = lrange + (upper-lower+1)*size - 1;
- generate_one_range(spaces, lrange, rrange, fp, esize);
- }
- else {
+ lrange = base + (lower-lindex)*size;
+ rrange = lrange + (upper-lower+1)*size - 1;
+ generate_one_range(spaces, lrange, rrange, fp, esize, print_values);
+ }
+ else {
+ if (rank == 1) {
for (int i=lower-lindex; i<=upper-lindex; i+=stride) {
+ uint64_t lrange, rrange;
lrange = base + i*size;
rrange = lrange + size - 1;
- generate_one_range(spaces, lrange, rrange, fp, esize);
+ generate_one_range(spaces, lrange, rrange,
+ fp, esize, print_values);
}
}
- }
- else {
- for (int i=lower-lindex; i<=upper-lindex; i+=stride) {
- generate_mem_ranges_one_rank(
- spaces, base+i*size, rank-1, ddp+1, fp, esize);
+ else {
+ for (int i=lower-lindex; i<=upper-lindex; i+=stride) {
+ generate_mem_ranges_one_rank(
+ spaces, base+i*size, rank-1, ddp+1,
+ fp, esize, print_values);
+ }
}
}
}
static void generate_mem_ranges(
const char *spaces,
- const arr_desc *adp,
+ const Arr_Desc *adp,
bool deref,
- fpp fp
+ fpp fp,
+ bool print_values
)
{
uint64_t esize;
@@ -241,13 +271,13 @@ static void generate_mem_ranges(
// For c_cean_var the base addr is the address of the data
// For c_cean_var_ptr the base addr is dereferenced to get to the data
spaces, deref ? *((uint64_t*)(adp->base)) : adp->base,
- adp->rank, &adp->dim[0], fp, esize);
- (*fp)(spaces, last_left, last_right, esize);
+ adp->rank, &adp->dim[0], fp, esize, print_values);
+ (*fp)(spaces, last_left, last_right, esize, print_values);
}
// returns offset and length of the data to be transferred
void __arr_data_offset_and_length(
- const arr_desc *adp,
+ const Arr_Desc *adp,
int64_t &offset,
int64_t &length
)
@@ -284,11 +314,12 @@ void __arr_data_offset_and_length(
#if OFFLOAD_DEBUG > 0
-void print_range(
+static void print_range(
const char *spaces,
uint64_t low,
uint64_t high,
- int esize
+ int esize,
+ bool print_values
)
{
char buffer[1024];
@@ -297,7 +328,7 @@ void print_range(
OFFLOAD_TRACE(3, "%s print_range(low=%p, high=%p, esize=%d)\n",
spaces, (void*)low, (void*)high, esize);
- if (console_enabled < 4) {
+ if (console_enabled < 4 || !print_values) {
return;
}
OFFLOAD_TRACE(4, "%s values:\n", spaces);
@@ -340,8 +371,9 @@ void print_range(
void __arr_desc_dump(
const char *spaces,
const char *name,
- const arr_desc *adp,
- bool deref
+ const Arr_Desc *adp,
+ bool deref,
+ bool print_values
)
{
OFFLOAD_TRACE(2, "%s%s CEAN expression %p\n", spaces, name, adp);
@@ -360,7 +392,7 @@ void __arr_desc_dump(
}
// For c_cean_var the base addr is the address of the data
// For c_cean_var_ptr the base addr is dereferenced to get to the data
- generate_mem_ranges(spaces, adp, deref, &print_range);
+ generate_mem_ranges(spaces, adp, deref, &print_range, print_values);
}
}
#endif // OFFLOAD_DEBUG
diff --git a/liboffloadmic/runtime/cean_util.h b/liboffloadmic/runtime/cean_util.h
index 83140479269b..8b7c43767390 100644
--- a/liboffloadmic/runtime/cean_util.h
+++ b/liboffloadmic/runtime/cean_util.h
@@ -1,5 +1,5 @@
/*
- Copyright (c) 2014 Intel Corporation. All Rights Reserved.
+ Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
@@ -32,9 +32,10 @@
#define CEAN_UTIL_H_INCLUDED
#include <stdint.h>
+#include "offload_util.h"
// CEAN expression representation
-struct dim_desc {
+struct Dim_Desc {
int64_t size; // Length of data type
int64_t lindex; // Lower index
int64_t lower; // Lower section bound
@@ -42,10 +43,10 @@ struct dim_desc {
int64_t stride; // Stride
};
-struct arr_desc {
+struct Arr_Desc {
int64_t base; // Base address
int64_t rank; // Rank of array
- dim_desc dim[1];
+ Dim_Desc dim[1];
};
struct CeanReadDim {
@@ -55,6 +56,7 @@ struct CeanReadDim {
};
struct CeanReadRanges {
+ Arr_Desc* arr_desc;
void * ptr;
int64_t current_number; // the number of ranges read
int64_t range_max_number; // number of contiguous ranges
@@ -66,23 +68,23 @@ struct CeanReadRanges {
// array descriptor length
#define __arr_desc_length(rank) \
- (sizeof(int64_t) + sizeof(dim_desc) * (rank))
+ (sizeof(int64_t) + sizeof(Dim_Desc) * (rank))
// returns offset and length of the data to be transferred
-void __arr_data_offset_and_length(const arr_desc *adp,
+DLL_LOCAL void __arr_data_offset_and_length(const Arr_Desc *adp,
int64_t &offset,
int64_t &length);
// define if data array described by argument is contiguous one
-bool is_arr_desc_contiguous(const arr_desc *ap);
+DLL_LOCAL bool is_arr_desc_contiguous(const Arr_Desc *ap);
// allocate element of CeanReadRanges type initialized
// to read consequently contiguous ranges described by "ap" argument
-CeanReadRanges * init_read_ranges_arr_desc(const arr_desc *ap);
+DLL_LOCAL CeanReadRanges * init_read_ranges_arr_desc(const Arr_Desc *ap);
-// check if ranges described by 1 argument could be transfered into ranges
+// check if ranges described by 1 argument could be transferred into ranges
// described by 2-nd one
-bool cean_ranges_match(
+DLL_LOCAL bool cean_ranges_match(
CeanReadRanges * read_rng1,
CeanReadRanges * read_rng2
);
@@ -90,27 +92,27 @@ bool cean_ranges_match(
// first argument - returned value by call to init_read_ranges_arr_desc.
// returns true if offset and length of next range is set successfuly.
// returns false if the ranges is over.
-bool get_next_range(
+DLL_LOCAL bool get_next_range(
CeanReadRanges * read_rng,
int64_t *offset
);
-// returns number of transfered bytes
-int64_t cean_get_transf_size(CeanReadRanges * read_rng);
+// returns number of transferred bytes
+DLL_LOCAL int64_t cean_get_transf_size(CeanReadRanges * read_rng);
#if OFFLOAD_DEBUG > 0
// prints array descriptor contents to stderr
-void __arr_desc_dump(
+DLL_LOCAL void __arr_desc_dump(
const char *spaces,
const char *name,
- const arr_desc *adp,
- bool dereference);
+ const Arr_Desc *adp,
+ bool dereference,
+ bool print_values);
+#define ARRAY_DESC_DUMP(spaces, name, adp, dereference, print_values) \
+ if (console_enabled >= 2) \
+ __arr_desc_dump(spaces, name, adp, dereference, print_values);
#else
-#define __arr_desc_dump(
- spaces,
- name,
- adp,
- dereference)
+#define ARRAY_DESC_DUMP(spaces, name, adp, dereference, print_values)
#endif // OFFLOAD_DEBUG
#endif // CEAN_UTIL_H_INCLUDED
diff --git a/liboffloadmic/runtime/coi/coi_client.cpp b/liboffloadmic/runtime/coi/coi_client.cpp
index 0fb2c398855a..ca18e5f95250 100644
--- a/liboffloadmic/runtime/coi/coi_client.cpp
+++ b/liboffloadmic/runtime/coi/coi_client.cpp
@@ -1,5 +1,5 @@
/*
- Copyright (c) 2014 Intel Corporation. All Rights Reserved.
+ Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
@@ -50,6 +50,13 @@ COIRESULT (*ProcessCreateFromMemory)(COIENGINE, const char*, const void*,
const char**, uint8_t, const char*,
uint64_t, const char*, const char*,
uint64_t, COIPROCESS*);
+COIRESULT (*ProcessCreateFromFile)(COIENGINE, const char*,
+ int, const char**, uint8_t,
+ const char**, uint8_t, const char*,
+ uint64_t, const char*,COIPROCESS*);
+COIRESULT (*ProcessSetCacheSize)(COIPROCESS, uint64_t, uint32_t,
+ uint64_t, uint32_t, uint32_t,
+ const COIEVENT*, COIEVENT*);
COIRESULT (*ProcessDestroy)(COIPROCESS, int32_t, uint8_t, int8_t*, uint32_t*);
COIRESULT (*ProcessGetFunctionHandles)(COIPROCESS, uint32_t, const char**,
COIFUNCTION*);
@@ -57,6 +64,8 @@ COIRESULT (*ProcessLoadLibraryFromMemory)(COIPROCESS, const void*, uint64_t,
const char*, const char*,
const char*, uint64_t, uint32_t,
COILIBRARY*);
+COIRESULT (*ProcessUnloadLibrary)(COIPROCESS,
+ COILIBRARY);
COIRESULT (*ProcessRegisterLibraries)(uint32_t, const void**, const uint64_t*,
const char**, const uint64_t*);
@@ -80,6 +89,13 @@ COIRESULT (*BufferWrite)(COIBUFFER, uint64_t, const void*, uint64_t,
COI_COPY_TYPE, uint32_t, const COIEVENT*, COIEVENT*);
COIRESULT (*BufferRead)(COIBUFFER, uint64_t, void*, uint64_t, COI_COPY_TYPE,
uint32_t, const COIEVENT*, COIEVENT*);
+COIRESULT (*BufferReadMultiD)(COIBUFFER, uint64_t,
+ void *, void *, COI_COPY_TYPE,
+ uint32_t, const COIEVENT*, COIEVENT*);
+COIRESULT (*BufferWriteMultiD)(COIBUFFER, const COIPROCESS,
+ uint64_t, void *, void *,
+ COI_COPY_TYPE, uint32_t, const COIEVENT*, COIEVENT*);
+
COIRESULT (*BufferCopy)(COIBUFFER, COIBUFFER, uint64_t, uint64_t, uint64_t,
COI_COPY_TYPE, uint32_t, const COIEVENT*, COIEVENT*);
COIRESULT (*BufferGetSinkAddress)(COIBUFFER, uint64_t*);
@@ -92,6 +108,20 @@ COIRESULT (*EventWait)(uint16_t, const COIEVENT*, int32_t, uint8_t, uint32_t*,
uint64_t (*PerfGetCycleFrequency)(void);
+COIRESULT (*PipelineClearCPUMask) (COI_CPU_MASK);
+
+COIRESULT (*PipelineSetCPUMask) (COIPROCESS, uint32_t,
+ uint8_t, COI_CPU_MASK);
+COIRESULT (*EngineGetInfo)(COIENGINE, uint32_t, COI_ENGINE_INFO*);
+
+COIRESULT (*EventRegisterCallback)(
+ const COIEVENT,
+ void (*)(COIEVENT, const COIRESULT, const void*),
+ const void*,
+ const uint64_t);
+
+COIRESULT (*ProcessConfigureDMA)(const uint64_t, const int);
+
bool init(void)
{
#ifndef TARGET_WINNT
@@ -140,6 +170,32 @@ bool init(void)
return false;
}
+ ProcessSetCacheSize =
+ (COIRESULT (*)(COIPROCESS, uint64_t, uint32_t,
+ uint64_t, uint32_t, uint32_t,
+ const COIEVENT*, COIEVENT*))
+ DL_sym(lib_handle, "COIProcessSetCacheSize", COI_VERSION1);
+ if (ProcessSetCacheSize == 0) {
+ OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in COI library\n",
+ "COIProcessSetCacheSize");
+#if 0 // for now disable as ProcessSetCacheSize is not available on < MPSS 3.4
+ fini();
+ return false;
+#endif
+ }
+
+ ProcessCreateFromFile =
+ (COIRESULT (*)(COIENGINE, const char*, int, const char**, uint8_t,
+ const char**, uint8_t, const char*, uint64_t,
+ const char*, COIPROCESS*))
+ DL_sym(lib_handle, "COIProcessCreateFromFile", COI_VERSION1);
+ if (ProcessCreateFromFile == 0) {
+ OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in COI library\n",
+ "COIProcessCreateFromFile");
+ fini();
+ return false;
+ }
+
ProcessDestroy =
(COIRESULT (*)(COIPROCESS, int32_t, uint8_t, int8_t*,
uint32_t*))
@@ -173,6 +229,17 @@ bool init(void)
return false;
}
+ ProcessUnloadLibrary =
+ (COIRESULT (*)(COIPROCESS,
+ COILIBRARY))
+ DL_sym(lib_handle, "COIProcessUnloadLibrary", COI_VERSION1);
+ if (ProcessUnloadLibrary == 0) {
+ OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in COI library\n",
+ "COIProcessUnloadLibrary");
+ fini();
+ return false;
+ }
+
ProcessRegisterLibraries =
(COIRESULT (*)(uint32_t, const void**, const uint64_t*, const char**,
const uint64_t*))
@@ -295,6 +362,22 @@ bool init(void)
return false;
}
+ BufferReadMultiD =
+ (COIRESULT (*)(COIBUFFER, uint64_t,
+ void *, void *, COI_COPY_TYPE,
+ uint32_t, const COIEVENT*, COIEVENT*))
+ DL_sym(lib_handle, "COIBufferReadMultiD", COI_VERSION1);
+ // We accept that coi library has no COIBufferReadMultiD routine.
+ // So there is no check for zero value
+
+ BufferWriteMultiD =
+ (COIRESULT (*)(COIBUFFER, const COIPROCESS,
+ uint64_t, void *, void *,
+ COI_COPY_TYPE, uint32_t, const COIEVENT*, COIEVENT*))
+ DL_sym(lib_handle, "COIBufferWriteMultiD", COI_VERSION1);
+ // We accept that coi library has no COIBufferWriteMultiD routine.
+ // So there is no check for zero value
+
BufferCopy =
(COIRESULT (*)(COIBUFFER, COIBUFFER, uint64_t, uint64_t, uint64_t,
COI_COPY_TYPE, uint32_t, const COIEVENT*,
@@ -350,6 +433,47 @@ bool init(void)
return false;
}
+ PipelineClearCPUMask =
+ (COIRESULT (*)(COI_CPU_MASK))
+ DL_sym(lib_handle, "COIPipelineClearCPUMask", COI_VERSION1);
+ if (PipelineClearCPUMask == 0) {
+ OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in COI library\n",
+ "COIPipelineClearCPUMask");
+ fini();
+ return false;
+ }
+
+ PipelineSetCPUMask =
+ (COIRESULT (*)(COIPROCESS, uint32_t,uint8_t, COI_CPU_MASK))
+ DL_sym(lib_handle, "COIPipelineSetCPUMask", COI_VERSION1);
+ if (PipelineSetCPUMask == 0) {
+ OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in COI library\n",
+ "COIPipelineSetCPUMask");
+ fini();
+ return false;
+ }
+
+ EngineGetInfo =
+ (COIRESULT (*)(COIENGINE, uint32_t, COI_ENGINE_INFO*))
+ DL_sym(lib_handle, "COIEngineGetInfo", COI_VERSION1);
+ if (COIEngineGetInfo == 0) {
+ OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in COI library\n",
+ "COIEngineGetInfo");
+ fini();
+ return false;
+ }
+
+ EventRegisterCallback =
+ (COIRESULT (*)(COIEVENT,
+ void (*)(COIEVENT, const COIRESULT, const void*),
+ const void*,
+ const uint64_t))
+ DL_sym(lib_handle, "COIEventRegisterCallback", COI_VERSION1);
+
+ ProcessConfigureDMA =
+ (COIRESULT (*)(const uint64_t, const int))
+ DL_sym(lib_handle, "COIProcessConfigureDMA", COI_VERSION1);
+
is_available = true;
return true;
diff --git a/liboffloadmic/runtime/coi/coi_client.h b/liboffloadmic/runtime/coi/coi_client.h
index 54b83a9d9650..9c91077aac6f 100644
--- a/liboffloadmic/runtime/coi/coi_client.h
+++ b/liboffloadmic/runtime/coi/coi_client.h
@@ -1,5 +1,5 @@
/*
- Copyright (c) 2014 Intel Corporation. All Rights Reserved.
+ Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
@@ -28,7 +28,7 @@
*/
-// The interface betwen offload library and the COI API on the host
+// The interface between offload library and the COI API on the host
#ifndef COI_CLIENT_H_INCLUDED
#define COI_CLIENT_H_INCLUDED
@@ -54,16 +54,16 @@
// COI library interface
namespace COI {
-extern bool init(void);
-extern void fini(void);
+DLL_LOCAL extern bool init(void);
+DLL_LOCAL extern void fini(void);
-extern bool is_available;
+DLL_LOCAL extern bool is_available;
// pointers to functions from COI library
-extern COIRESULT (*EngineGetCount)(COI_ISA_TYPE, uint32_t*);
-extern COIRESULT (*EngineGetHandle)(COI_ISA_TYPE, uint32_t, COIENGINE*);
+DLL_LOCAL extern COIRESULT (*EngineGetCount)(COI_ISA_TYPE, uint32_t*);
+DLL_LOCAL extern COIRESULT (*EngineGetHandle)(COI_ISA_TYPE, uint32_t, COIENGINE*);
-extern COIRESULT (*ProcessCreateFromMemory)(COIENGINE, const char*,
+DLL_LOCAL extern COIRESULT (*ProcessCreateFromMemory)(COIENGINE, const char*,
const void*, uint64_t, int,
const char**, uint8_t,
const char**, uint8_t,
@@ -71,12 +71,23 @@ extern COIRESULT (*ProcessCreateFromMemory)(COIENGINE, const char*,
const char*,
const char*, uint64_t,
COIPROCESS*);
-extern COIRESULT (*ProcessDestroy)(COIPROCESS, int32_t, uint8_t,
+DLL_LOCAL extern COIRESULT (*ProcessCreateFromFile)(COIENGINE, const char*, int,
+ const char**, uint8_t,
+ const char**,
+ uint8_t,
+ const char*,
+ uint64_t,
+ const char*,
+ COIPROCESS*);
+DLL_LOCAL extern COIRESULT (*ProcessSetCacheSize)(COIPROCESS, uint64_t, uint32_t,
+ uint64_t, uint32_t, uint32_t,
+ const COIEVENT*, COIEVENT*);
+DLL_LOCAL extern COIRESULT (*ProcessDestroy)(COIPROCESS, int32_t, uint8_t,
int8_t*, uint32_t*);
-extern COIRESULT (*ProcessGetFunctionHandles)(COIPROCESS, uint32_t,
+DLL_LOCAL extern COIRESULT (*ProcessGetFunctionHandles)(COIPROCESS, uint32_t,
const char**,
COIFUNCTION*);
-extern COIRESULT (*ProcessLoadLibraryFromMemory)(COIPROCESS,
+DLL_LOCAL extern COIRESULT (*ProcessLoadLibraryFromMemory)(COIPROCESS,
const void*,
uint64_t,
const char*,
@@ -85,54 +96,80 @@ extern COIRESULT (*ProcessLoadLibraryFromMemory)(COIPROCESS,
uint64_t,
uint32_t,
COILIBRARY*);
-extern COIRESULT (*ProcessRegisterLibraries)(uint32_t,
+
+DLL_LOCAL extern COIRESULT (*ProcessUnloadLibrary)(COIPROCESS,
+ COILIBRARY);
+
+DLL_LOCAL extern COIRESULT (*ProcessRegisterLibraries)(uint32_t,
const void**,
const uint64_t*,
const char**,
const uint64_t*);
-extern COIRESULT (*PipelineCreate)(COIPROCESS, COI_CPU_MASK, uint32_t,
+DLL_LOCAL extern COIRESULT (*PipelineCreate)(COIPROCESS, COI_CPU_MASK, uint32_t,
COIPIPELINE*);
-extern COIRESULT (*PipelineDestroy)(COIPIPELINE);
-extern COIRESULT (*PipelineRunFunction)(COIPIPELINE, COIFUNCTION,
+DLL_LOCAL extern COIRESULT (*PipelineDestroy)(COIPIPELINE);
+DLL_LOCAL extern COIRESULT (*PipelineRunFunction)(COIPIPELINE, COIFUNCTION,
uint32_t, const COIBUFFER*,
const COI_ACCESS_FLAGS*,
uint32_t, const COIEVENT*,
const void*, uint16_t, void*,
uint16_t, COIEVENT*);
-extern COIRESULT (*BufferCreate)(uint64_t, COI_BUFFER_TYPE, uint32_t,
+DLL_LOCAL extern COIRESULT (*BufferCreate)(uint64_t, COI_BUFFER_TYPE, uint32_t,
const void*, uint32_t,
const COIPROCESS*, COIBUFFER*);
-extern COIRESULT (*BufferCreateFromMemory)(uint64_t, COI_BUFFER_TYPE,
+DLL_LOCAL extern COIRESULT (*BufferCreateFromMemory)(uint64_t, COI_BUFFER_TYPE,
uint32_t, void*,
uint32_t, const COIPROCESS*,
COIBUFFER*);
-extern COIRESULT (*BufferDestroy)(COIBUFFER);
-extern COIRESULT (*BufferMap)(COIBUFFER, uint64_t, uint64_t,
+DLL_LOCAL extern COIRESULT (*BufferDestroy)(COIBUFFER);
+DLL_LOCAL extern COIRESULT (*BufferMap)(COIBUFFER, uint64_t, uint64_t,
COI_MAP_TYPE, uint32_t, const COIEVENT*,
COIEVENT*, COIMAPINSTANCE*, void**);
-extern COIRESULT (*BufferUnmap)(COIMAPINSTANCE, uint32_t,
+DLL_LOCAL extern COIRESULT (*BufferUnmap)(COIMAPINSTANCE, uint32_t,
const COIEVENT*, COIEVENT*);
-extern COIRESULT (*BufferWrite)(COIBUFFER, uint64_t, const void*,
+DLL_LOCAL extern COIRESULT (*BufferWrite)(COIBUFFER, uint64_t, const void*,
uint64_t, COI_COPY_TYPE, uint32_t,
const COIEVENT*, COIEVENT*);
-extern COIRESULT (*BufferRead)(COIBUFFER, uint64_t, void*, uint64_t,
+DLL_LOCAL extern COIRESULT (*BufferRead)(COIBUFFER, uint64_t, void*, uint64_t,
COI_COPY_TYPE, uint32_t,
const COIEVENT*, COIEVENT*);
-extern COIRESULT (*BufferCopy)(COIBUFFER, COIBUFFER, uint64_t, uint64_t,
+DLL_LOCAL extern COIRESULT (*BufferReadMultiD)(COIBUFFER, uint64_t,
+ void *, void *, COI_COPY_TYPE,
+ uint32_t, const COIEVENT*, COIEVENT*);
+DLL_LOCAL extern COIRESULT (*BufferWriteMultiD)(COIBUFFER, const COIPROCESS,
+ uint64_t, void *, void *,
+ COI_COPY_TYPE, uint32_t, const COIEVENT*, COIEVENT*);
+
+DLL_LOCAL extern COIRESULT (*BufferCopy)(COIBUFFER, COIBUFFER, uint64_t, uint64_t,
uint64_t, COI_COPY_TYPE, uint32_t,
const COIEVENT*, COIEVENT*);
-extern COIRESULT (*BufferGetSinkAddress)(COIBUFFER, uint64_t*);
-extern COIRESULT (*BufferSetState)(COIBUFFER, COIPROCESS, COI_BUFFER_STATE,
+DLL_LOCAL extern COIRESULT (*BufferGetSinkAddress)(COIBUFFER, uint64_t*);
+DLL_LOCAL extern COIRESULT (*BufferSetState)(COIBUFFER, COIPROCESS, COI_BUFFER_STATE,
COI_BUFFER_MOVE_FLAG, uint32_t,
const COIEVENT*, COIEVENT*);
-extern COIRESULT (*EventWait)(uint16_t, const COIEVENT*, int32_t,
+DLL_LOCAL extern COIRESULT (*EventWait)(uint16_t, const COIEVENT*, int32_t,
uint8_t, uint32_t*, uint32_t*);
-extern uint64_t (*PerfGetCycleFrequency)(void);
+DLL_LOCAL extern uint64_t (*PerfGetCycleFrequency)(void);
+
+DLL_LOCAL extern COIRESULT (*ProcessConfigureDMA)(const uint64_t, const int);
+
+extern COIRESULT (*PipelineClearCPUMask)(COI_CPU_MASK);
+
+extern COIRESULT (*PipelineSetCPUMask)(COIPROCESS, uint32_t,
+ uint8_t, COI_CPU_MASK);
+extern COIRESULT (*EngineGetInfo)(COIENGINE, uint32_t, COI_ENGINE_INFO*);
+extern COIRESULT (*EventRegisterCallback)(
+ const COIEVENT,
+ void (*)(COIEVENT, const COIRESULT, const void*),
+ const void*,
+ const uint64_t);
+
+const int DMA_MODE_READ_WRITE = 1;
} // namespace COI
#endif // COI_CLIENT_H_INCLUDED
diff --git a/liboffloadmic/runtime/coi/coi_server.cpp b/liboffloadmic/runtime/coi/coi_server.cpp
index 7eebf5a306e6..88dde9157dae 100644
--- a/liboffloadmic/runtime/coi/coi_server.cpp
+++ b/liboffloadmic/runtime/coi/coi_server.cpp
@@ -1,5 +1,5 @@
/*
- Copyright (c) 2014 Intel Corporation. All Rights Reserved.
+ Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
@@ -38,6 +38,22 @@
#include "../offload_myo_target.h" // for __offload_myoLibInit/Fini
#endif // MYO_SUPPORT
+#if !defined(CPU_COUNT)
+// if CPU_COUNT is not defined count number of CPUs manually
+static
+int my_cpu_count(cpu_set_t const *cpu_set)
+{
+ int res = 0;
+ for (int i = 0; i < sizeof(cpu_set_t) / sizeof(__cpu_mask); ++i) {
+ res += __builtin_popcountl(cpu_set->__bits[i]);
+ }
+ return res;
+}
+// Map CPU_COUNT to our function
+#define CPU_COUNT(x) my_cpu_count(x)
+
+#endif
+
COINATIVELIBEXPORT
void server_compute(
uint32_t buffer_count,
@@ -118,6 +134,20 @@ void server_var_table_copy(
__offload_vars.table_copy(buffers[0], *static_cast<int64_t*>(misc_data));
}
+COINATIVELIBEXPORT
+void server_set_stream_affinity(
+ uint32_t buffer_count,
+ void** buffers,
+ uint64_t* buffers_len,
+ void* misc_data,
+ uint16_t misc_data_len,
+ void* return_data,
+ uint16_t return_data_len
+)
+{
+ /* kmp affinity is not supported by GCC. */
+}
+
#ifdef MYO_SUPPORT
// temporary workaround for blocking behavior of myoiLibInit/Fini calls
COINATIVELIBEXPORT
diff --git a/liboffloadmic/runtime/coi/coi_server.h b/liboffloadmic/runtime/coi/coi_server.h
index 14376108e9dc..2ea67b795625 100644
--- a/liboffloadmic/runtime/coi/coi_server.h
+++ b/liboffloadmic/runtime/coi/coi_server.h
@@ -1,5 +1,5 @@
/*
- Copyright (c) 2014 Intel Corporation. All Rights Reserved.
+ Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
@@ -28,7 +28,7 @@
*/
-//The interface betwen offload library and the COI API on the target.
+// The interface between offload library and the COI API on the target
#ifndef COI_SERVER_H_INCLUDED
#define COI_SERVER_H_INCLUDED
diff --git a/liboffloadmic/runtime/compiler_if_host.cpp b/liboffloadmic/runtime/compiler_if_host.cpp
index c4e2a15633f7..7bac0e54de81 100644
--- a/liboffloadmic/runtime/compiler_if_host.cpp
+++ b/liboffloadmic/runtime/compiler_if_host.cpp
@@ -1,5 +1,5 @@
/*
- Copyright (c) 2014 Intel Corporation. All Rights Reserved.
+ Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
@@ -35,7 +35,7 @@
#include <alloca.h>
#endif // TARGET_WINNT
-// Global counter on host.
+// Global counter on host.
// This variable is used if P2OPT_offload_do_data_persistence == 2.
// The variable used to identify offload constructs contained in one procedure.
// Increment of OFFLOAD_CALL_COUNT is inserted at entries of HOST routines with
@@ -72,7 +72,7 @@ extern "C" OFFLOAD OFFLOAD_TARGET_ACQUIRE(
OFFLOAD_TIMER_START(timer_data, c_offload_host_initialize);
- // initalize all devices is init_type is on_offload_all
+ // initialize all devices is init_type is on_offload_all
if (retval && __offload_init_type == c_init_on_offload_all) {
for (int i = 0; i < mic_engines_total; i++) {
mic_engines[i].init();
@@ -241,7 +241,128 @@ extern "C" OFFLOAD OFFLOAD_TARGET_ACQUIRE1(
return ofld;
}
-int offload_offload_wrap(
+extern "C" OFFLOAD OFFLOAD_TARGET_ACQUIRE2(
+ TARGET_TYPE target_type,
+ int target_number,
+ int is_optional,
+ _Offload_status* status,
+ const char* file,
+ uint64_t line,
+ const void** stream
+)
+{
+ bool retval;
+ OFFLOAD ofld;
+
+ // initialize status
+ if (status != 0) {
+ status->result = OFFLOAD_UNAVAILABLE;
+ status->device_number = -1;
+ status->data_sent = 0;
+ status->data_received = 0;
+ }
+
+ // make sure libray is initialized
+ retval = __offload_init_library();
+ // OFFLOAD_TIMER_INIT must follow call to __offload_init_library
+ OffloadHostTimerData * timer_data = OFFLOAD_TIMER_INIT(file, line);
+
+ OFFLOAD_TIMER_START(timer_data, c_offload_host_total_offload);
+
+ OFFLOAD_TIMER_START(timer_data, c_offload_host_initialize);
+
+ // initalize all devices if init_type is on_offload_all
+ if (retval && __offload_init_type == c_init_on_offload_all) {
+ for (int i = 0; i < mic_engines_total; i++) {
+ mic_engines[i].init();
+ }
+ }
+ OFFLOAD_TIMER_STOP(timer_data, c_offload_host_initialize);
+
+ OFFLOAD_TIMER_START(timer_data, c_offload_host_target_acquire);
+
+ if (target_type == TARGET_HOST) {
+ // Host always available
+ retval = true;
+ }
+ else if (target_type == TARGET_MIC) {
+ _Offload_stream handle = *(reinterpret_cast<_Offload_stream*>(stream));
+ Stream * stream = handle ? Stream::find_stream(handle, false) : NULL;
+ if (target_number >= -1) {
+ if (retval) {
+ // device number is defined by stream
+ if (stream) {
+ target_number = stream->get_device();
+ target_number = target_number % mic_engines_total;
+ }
+
+ // reserve device in ORSL
+ if (target_number != -1) {
+ if (is_optional) {
+ if (!ORSL::try_reserve(target_number)) {
+ target_number = -1;
+ }
+ }
+ else {
+ if (!ORSL::reserve(target_number)) {
+ target_number = -1;
+ }
+ }
+ }
+
+ // initialize device
+ if (target_number >= 0 &&
+ __offload_init_type == c_init_on_offload) {
+ OFFLOAD_TIMER_START(timer_data, c_offload_host_initialize);
+ mic_engines[target_number].init();
+ OFFLOAD_TIMER_STOP(timer_data, c_offload_host_initialize);
+ }
+ }
+ else {
+ // fallback to CPU
+ target_number = -1;
+ }
+ if (!(target_number == -1 && handle == 0)) {
+ if (target_number < 0 || !retval) {
+ if (!is_optional && status == 0) {
+ LIBOFFLOAD_ERROR(c_device_is_not_available);
+ exit(1);
+ }
+
+ retval = false;
+ }
+ }
+ }
+ else {
+ LIBOFFLOAD_ERROR(c_invalid_device_number);
+ exit(1);
+ }
+ }
+
+ if (retval) {
+ ofld = new OffloadDescriptor(target_number, status,
+ !is_optional, false, timer_data);
+ OFFLOAD_TIMER_HOST_MIC_NUM(timer_data, target_number);
+ Offload_Report_Prolog(timer_data);
+ OFFLOAD_DEBUG_TRACE_1(2, timer_data->offload_number, c_offload_start,
+ "Starting offload: target_type = %d, "
+ "number = %d, is_optional = %d\n",
+ target_type, target_number, is_optional);
+
+ OFFLOAD_TIMER_STOP(timer_data, c_offload_host_target_acquire);
+ }
+ else {
+ ofld = NULL;
+
+ OFFLOAD_TIMER_STOP(timer_data, c_offload_host_target_acquire);
+ OFFLOAD_TIMER_STOP(timer_data, c_offload_host_total_offload);
+ offload_report_free_data(timer_data);
+ }
+
+ return ofld;
+}
+
+static int offload_offload_wrap(
OFFLOAD ofld,
const char *name,
int is_empty,
@@ -252,12 +373,15 @@ int offload_offload_wrap(
const void **waits,
const void **signal,
int entry_id,
- const void *stack_addr
+ const void *stack_addr,
+ OffloadFlags offload_flags
)
{
bool ret = ofld->offload(name, is_empty, vars, vars2, num_vars,
- waits, num_waits, signal, entry_id, stack_addr);
- if (!ret || signal == 0) {
+ waits, num_waits, signal, entry_id,
+ stack_addr, offload_flags);
+ if (!ret || (signal == 0 && ofld->get_stream() == 0 &&
+ !offload_flags.bits.omp_async)) {
delete ofld;
}
return ret;
@@ -278,7 +402,7 @@ extern "C" int OFFLOAD_OFFLOAD1(
return offload_offload_wrap(ofld, name, is_empty,
num_vars, vars, vars2,
num_waits, waits,
- signal, NULL, NULL);
+ signal, 0, NULL, {0});
}
extern "C" int OFFLOAD_OFFLOAD2(
@@ -298,7 +422,35 @@ extern "C" int OFFLOAD_OFFLOAD2(
return offload_offload_wrap(ofld, name, is_empty,
num_vars, vars, vars2,
num_waits, waits,
- signal, entry_id, stack_addr);
+ signal, entry_id, stack_addr, {0});
+}
+
+extern "C" int OFFLOAD_OFFLOAD3(
+ OFFLOAD ofld,
+ const char *name,
+ int is_empty,
+ int num_vars,
+ VarDesc *vars,
+ VarDesc2 *vars2,
+ int num_waits,
+ const void** waits,
+ const void** signal,
+ int entry_id,
+ const void *stack_addr,
+ OffloadFlags offload_flags,
+ const void** stream
+)
+{
+ // 1. if the source is compiled with -traceback then stream is 0
+ // 2. if offload has a stream clause then stream is address of stream value
+ if (stream) {
+ ofld->set_stream(*(reinterpret_cast<_Offload_stream *>(stream)));
+ }
+
+ return offload_offload_wrap(ofld, name, is_empty,
+ num_vars, vars, vars2,
+ num_waits, waits,
+ signal, entry_id, stack_addr, offload_flags);
}
extern "C" int OFFLOAD_OFFLOAD(
diff --git a/liboffloadmic/runtime/compiler_if_host.h b/liboffloadmic/runtime/compiler_if_host.h
index 1a7135088f62..b8f36db7de51 100644
--- a/liboffloadmic/runtime/compiler_if_host.h
+++ b/liboffloadmic/runtime/compiler_if_host.h
@@ -1,5 +1,5 @@
/*
- Copyright (c) 2014 Intel Corporation. All Rights Reserved.
+ Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
@@ -39,9 +39,11 @@
#define OFFLOAD_TARGET_ACQUIRE OFFLOAD_PREFIX(target_acquire)
#define OFFLOAD_TARGET_ACQUIRE1 OFFLOAD_PREFIX(target_acquire1)
+#define OFFLOAD_TARGET_ACQUIRE2 OFFLOAD_PREFIX(target_acquire2)
#define OFFLOAD_OFFLOAD OFFLOAD_PREFIX(offload)
#define OFFLOAD_OFFLOAD1 OFFLOAD_PREFIX(offload1)
#define OFFLOAD_OFFLOAD2 OFFLOAD_PREFIX(offload2)
+#define OFFLOAD_OFFLOAD3 OFFLOAD_PREFIX(offload3)
#define OFFLOAD_CALL_COUNT OFFLOAD_PREFIX(offload_call_count)
@@ -75,6 +77,26 @@ extern "C" OFFLOAD OFFLOAD_TARGET_ACQUIRE1(
uint64_t line
);
+/*! \fn OFFLOAD_TARGET_ACQUIRE2
+ \brief Attempt to acquire the target.
+ \param target_type The type of target.
+ \param target_number The device number.
+ \param is_optional Whether CPU fall-back is allowed.
+ \param status Address of variable to hold offload status.
+ \param file Filename in which this offload occurred.
+ \param line Line number in the file where this offload occurred.
+ \param stream Pointer to stream value.
+*/
+extern "C" OFFLOAD OFFLOAD_TARGET_ACQUIRE2(
+ TARGET_TYPE target_type,
+ int target_number,
+ int is_optional,
+ _Offload_status* status,
+ const char* file,
+ uint64_t line,
+ const void** stream
+);
+
/*! \fn OFFLOAD_OFFLOAD1
\brief Run function on target using interface for old data persistence.
\param o Offload descriptor created by OFFLOAD_TARGET_ACQUIRE.
@@ -127,6 +149,40 @@ extern "C" int OFFLOAD_OFFLOAD2(
const void *stack_addr
);
+
+/*! \fn OFFLOAD_OFFLOAD3
+ \brief Run function on target, API introduced in 15.0 Update 1
+ \brief when targetptr, preallocated feature was introduced.
+ \param o Offload descriptor created by OFFLOAD_TARGET_ACQUIRE.
+ \param name Name of offload entry point.
+ \param is_empty If no code to execute (e.g. offload_transfer)
+ \param num_vars Number of variable descriptors.
+ \param vars Pointer to VarDesc array.
+ \param vars2 Pointer to VarDesc2 array.
+ \param num_waits Number of "wait" values.
+ \param waits Pointer to array of wait values.
+ \param signal Pointer to signal value or NULL.
+ \param entry_id A signature for the function doing the offload.
+ \param stack_addr The stack frame address of the function doing offload.
+ \param offload_flags Flags to indicate Fortran traceback, OpenMP async.
+ \param stream Pointer to stream value or NULL.
+*/
+extern "C" int OFFLOAD_OFFLOAD3(
+ OFFLOAD ofld,
+ const char *name,
+ int is_empty,
+ int num_vars,
+ VarDesc *vars,
+ VarDesc2 *vars2,
+ int num_waits,
+ const void** waits,
+ const void** signal,
+ int entry_id,
+ const void *stack_addr,
+ OffloadFlags offload_flags,
+ const void** stream
+);
+
// Run function on target (obsolete).
// @param o OFFLOAD object
// @param name function name
diff --git a/liboffloadmic/runtime/compiler_if_target.cpp b/liboffloadmic/runtime/compiler_if_target.cpp
index 839ef14ed63b..bc51d242ef07 100644
--- a/liboffloadmic/runtime/compiler_if_target.cpp
+++ b/liboffloadmic/runtime/compiler_if_target.cpp
@@ -1,5 +1,5 @@
/*
- Copyright (c) 2014 Intel Corporation. All Rights Reserved.
+ Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
diff --git a/liboffloadmic/runtime/compiler_if_target.h b/liboffloadmic/runtime/compiler_if_target.h
index c4de1260d346..9554238916c7 100644
--- a/liboffloadmic/runtime/compiler_if_target.h
+++ b/liboffloadmic/runtime/compiler_if_target.h
@@ -1,5 +1,5 @@
/*
- Copyright (c) 2014 Intel Corporation. All Rights Reserved.
+ Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
diff --git a/liboffloadmic/runtime/dv_util.cpp b/liboffloadmic/runtime/dv_util.cpp
index 63f50592e009..e31714b8d410 100644
--- a/liboffloadmic/runtime/dv_util.cpp
+++ b/liboffloadmic/runtime/dv_util.cpp
@@ -1,5 +1,5 @@
/*
- Copyright (c) 2014 Intel Corporation. All Rights Reserved.
+ Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
@@ -102,8 +102,8 @@ CeanReadRanges * init_read_ranges_dv(const ArrDesc *dvp)
}
res = (CeanReadRanges *)malloc(
sizeof(CeanReadRanges) + (rank - i) * sizeof(CeanReadDim));
- if (res == NULL)
- LIBOFFLOAD_ERROR(c_malloc);
+ if (res == NULL)
+ LIBOFFLOAD_ERROR(c_malloc);
res -> last_noncont_ind = rank - i - 1;
count = 1;
for (; i < rank; i++) {
diff --git a/liboffloadmic/runtime/dv_util.h b/liboffloadmic/runtime/dv_util.h
index d62cecc6aa13..9095c32c5109 100644
--- a/liboffloadmic/runtime/dv_util.h
+++ b/liboffloadmic/runtime/dv_util.h
@@ -1,5 +1,5 @@
/*
- Copyright (c) 2014 Intel Corporation. All Rights Reserved.
+ Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
@@ -32,6 +32,7 @@
#define DV_UTIL_H_INCLUDED
#include <stdint.h>
+#include "offload_util.h"
// Dope vector declarations
#define ArrDescMaxArrayRank 31
@@ -64,18 +65,18 @@ typedef struct ArrDesc {
typedef ArrDesc* pArrDesc;
-bool __dv_is_contiguous(const ArrDesc *dvp);
+DLL_LOCAL bool __dv_is_contiguous(const ArrDesc *dvp);
-bool __dv_is_allocated(const ArrDesc *dvp);
+DLL_LOCAL bool __dv_is_allocated(const ArrDesc *dvp);
-uint64_t __dv_data_length(const ArrDesc *dvp);
+DLL_LOCAL uint64_t __dv_data_length(const ArrDesc *dvp);
-uint64_t __dv_data_length(const ArrDesc *dvp, int64_t nelems);
+DLL_LOCAL uint64_t __dv_data_length(const ArrDesc *dvp, int64_t nelems);
-CeanReadRanges * init_read_ranges_dv(const ArrDesc *dvp);
+DLL_LOCAL CeanReadRanges * init_read_ranges_dv(const ArrDesc *dvp);
#if OFFLOAD_DEBUG > 0
-void __dv_desc_dump(const char *name, const ArrDesc *dvp);
+DLL_LOCAL void __dv_desc_dump(const char *name, const ArrDesc *dvp);
#else // OFFLOAD_DEBUG
#define __dv_desc_dump(name, dvp)
#endif // OFFLOAD_DEBUG
diff --git a/liboffloadmic/runtime/emulator/coi_common.h b/liboffloadmic/runtime/emulator/coi_common.h
index 482c88854378..7eae324ee740 100644
--- a/liboffloadmic/runtime/emulator/coi_common.h
+++ b/liboffloadmic/runtime/emulator/coi_common.h
@@ -1,5 +1,5 @@
/*
- Copyright (c) 2014 Intel Corporation. All Rights Reserved.
+ Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
@@ -62,8 +62,8 @@
/* Environment variable for target executable run command. */
#define OFFLOAD_EMUL_RUN_ENV "OFFLOAD_EMUL_RUN"
-/* Environment variable for number ok KNC devices. */
-#define OFFLOAD_EMUL_KNC_NUM_ENV "OFFLOAD_EMUL_KNC_NUM"
+/* Environment variable for number of emulated devices. */
+#define OFFLOAD_EMUL_NUM_ENV "OFFLOAD_EMUL_NUM"
/* Path to engine directory. */
@@ -133,6 +133,7 @@ typedef enum
CMD_BUFFER_UNMAP,
CMD_GET_FUNCTION_HANDLE,
CMD_OPEN_LIBRARY,
+ CMD_CLOSE_LIBRARY,
CMD_RUN_FUNCTION,
CMD_SHUTDOWN
} cmd_t;
diff --git a/liboffloadmic/runtime/emulator/coi_device.cpp b/liboffloadmic/runtime/emulator/coi_device.cpp
index 1a89a3f55df8..8773a7910ce6 100644
--- a/liboffloadmic/runtime/emulator/coi_device.cpp
+++ b/liboffloadmic/runtime/emulator/coi_device.cpp
@@ -1,5 +1,5 @@
/*
- Copyright (c) 2014 Intel Corporation. All Rights Reserved.
+ Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
@@ -109,8 +109,8 @@ SYMBOL_VERSION (COIProcessWaitForShutdown, 1) ()
strlen (PIPE_HOST_PATH) + strlen (mic_dir) + 1);
MALLOC (char *, pipe_target_path,
strlen (PIPE_TARGET_PATH) + strlen (mic_dir) + 1);
- sprintf (pipe_host_path, "%s"PIPE_HOST_PATH, mic_dir);
- sprintf (pipe_target_path, "%s"PIPE_TARGET_PATH, mic_dir);
+ sprintf (pipe_host_path, "%s" PIPE_HOST_PATH, mic_dir);
+ sprintf (pipe_target_path, "%s" PIPE_TARGET_PATH, mic_dir);
pipe_host = open (pipe_host_path, O_CLOEXEC | O_WRONLY);
if (pipe_host < 0)
COIERROR ("Cannot open target-to-host pipe.");
@@ -237,6 +237,7 @@ SYMBOL_VERSION (COIProcessWaitForShutdown, 1) ()
{
char *lib_path;
size_t len;
+ void *handle;
/* Receive data from host. */
READ (pipe_target, &len, sizeof (size_t));
@@ -244,14 +245,28 @@ SYMBOL_VERSION (COIProcessWaitForShutdown, 1) ()
READ (pipe_target, lib_path, len);
/* Open library. */
- if (dlopen (lib_path, RTLD_LAZY | RTLD_GLOBAL) == 0)
+ handle = dlopen (lib_path, RTLD_LAZY | RTLD_GLOBAL);
+ if (handle == NULL)
COIERROR ("Cannot load %s: %s", lib_path, dlerror ());
+ /* Send data to host. */
+ WRITE (pipe_host, &handle, sizeof (void *));
+
/* Clean up. */
free (lib_path);
break;
}
+ case CMD_CLOSE_LIBRARY:
+ {
+ /* Receive data from host. */
+ void *handle;
+ READ (pipe_target, &handle, sizeof (void *));
+
+ dlclose (handle);
+
+ break;
+ }
case CMD_RUN_FUNCTION:
{
uint16_t misc_data_len, return_data_len;
diff --git a/liboffloadmic/runtime/emulator/coi_device.h b/liboffloadmic/runtime/emulator/coi_device.h
index 779fdae69e73..616c91849ace 100644
--- a/liboffloadmic/runtime/emulator/coi_device.h
+++ b/liboffloadmic/runtime/emulator/coi_device.h
@@ -1,5 +1,5 @@
/*
- Copyright (c) 2014 Intel Corporation. All Rights Reserved.
+ Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
diff --git a/liboffloadmic/runtime/emulator/coi_host.cpp b/liboffloadmic/runtime/emulator/coi_host.cpp
index 3425920a4a12..cdc04c208e43 100644
--- a/liboffloadmic/runtime/emulator/coi_host.cpp
+++ b/liboffloadmic/runtime/emulator/coi_host.cpp
@@ -1,5 +1,5 @@
/*
- Copyright (c) 2014 Intel Corporation. All Rights Reserved.
+ Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
@@ -40,8 +40,8 @@ extern char **environ;
char **tmp_dirs;
unsigned tmp_dirs_num = 0;
-/* Number of KNC engines. */
-long knc_engines_num;
+/* Number of emulated MIC engines. */
+long num_engines;
/* Mutex to sync parallel execution. */
pthread_mutex_t mutex = PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP;
@@ -116,8 +116,7 @@ __attribute__((constructor))
static void
init ()
{
- if (read_long_env (OFFLOAD_EMUL_KNC_NUM_ENV, &knc_engines_num, 1)
- == COI_ERROR)
+ if (read_long_env (OFFLOAD_EMUL_NUM_ENV, &num_engines, 1) == COI_ERROR)
exit (0);
}
@@ -665,10 +664,10 @@ SYMBOL_VERSION (COIEngineGetCount, 1) (COI_ISA_TYPE isa,
COITRACE ("COIEngineGetCount");
/* Features of liboffload. */
- assert (isa == COI_ISA_KNC);
+ assert (isa == COI_ISA_MIC);
/* Prepare output arguments. */
- *count = knc_engines_num;
+ *count = num_engines;
return COI_SUCCESS;
}
@@ -684,10 +683,10 @@ SYMBOL_VERSION (COIEngineGetHandle, 1) (COI_ISA_TYPE isa,
Engine *engine;
/* Features of liboffload. */
- assert (isa == COI_ISA_KNC);
+ assert (isa == COI_ISA_MIC);
/* Check engine index. */
- if (index >= knc_engines_num)
+ if (index >= num_engines)
COIERROR ("Wrong engine index.");
/* Create engine handle. */
@@ -889,7 +888,7 @@ SYMBOL_VERSION (COIProcessCreateFromMemory, 1) (COIENGINE engine,
/* Create directory for pipes to prevent names collision. */
MALLOC (char *, pipes_path, strlen (PIPES_PATH) + strlen (eng->dir) + 1);
- sprintf (pipes_path, "%s"PIPES_PATH, eng->dir);
+ sprintf (pipes_path, "%s" PIPES_PATH, eng->dir);
if (mkdir (pipes_path, S_IRWXU) < 0)
COIERROR ("Cannot create folder %s.", pipes_path);
@@ -900,8 +899,8 @@ SYMBOL_VERSION (COIProcessCreateFromMemory, 1) (COIENGINE engine,
strlen (PIPE_TARGET_PATH) + strlen (eng->dir) + 1);
if (pipe_target_path == NULL)
COIERROR ("Cannot allocate memory.");
- sprintf (pipe_host_path, "%s"PIPE_HOST_PATH, eng->dir);
- sprintf (pipe_target_path, "%s"PIPE_TARGET_PATH, eng->dir);
+ sprintf (pipe_host_path, "%s" PIPE_HOST_PATH, eng->dir);
+ sprintf (pipe_target_path, "%s" PIPE_TARGET_PATH, eng->dir);
if (mkfifo (pipe_host_path, S_IRUSR | S_IWUSR) < 0)
COIERROR ("Cannot create pipe %s.", pipe_host_path);
if (mkfifo (pipe_target_path, S_IRUSR | S_IWUSR) < 0)
@@ -1019,6 +1018,27 @@ SYMBOL_VERSION (COIProcessCreateFromMemory, 1) (COIENGINE engine,
COIRESULT
+SYMBOL_VERSION (COIProcessCreateFromFile, 1) (COIENGINE in_Engine,
+ const char *in_pBinaryName,
+ int in_Argc,
+ const char **in_ppArgv,
+ uint8_t in_DupEnv,
+ const char **in_ppAdditionalEnv,
+ uint8_t in_ProxyActive,
+ const char *in_Reserved,
+ uint64_t in_BufferSpace,
+ const char *in_LibrarySearchPath,
+ COIPROCESS *out_pProcess)
+{
+ COITRACE ("COIProcessCreateFromFile");
+
+ /* liboffloadmic with GCC compiled binaries should never go here. */
+ assert (false);
+ return COI_ERROR;
+}
+
+
+COIRESULT
SYMBOL_VERSION (COIProcessDestroy, 1) (COIPROCESS process,
int32_t wait_timeout, // Ignored
uint8_t force,
@@ -1129,38 +1149,39 @@ SYMBOL_VERSION (COIProcessGetFunctionHandles, 1) (COIPROCESS process,
COIRESULT
-SYMBOL_VERSION (COIProcessLoadLibraryFromMemory, 2) (COIPROCESS process,
- const void *lib_buffer,
- uint64_t lib_buffer_len,
- const char *lib_name,
- const char *lib_search_path,
- const char *file_of_origin, // Ignored
- uint64_t file_from_origin_offset, // Ignored
- uint32_t flags, // Ignored
- COILIBRARY *library) // Ignored
+SYMBOL_VERSION (COIProcessLoadLibraryFromMemory, 2) (COIPROCESS in_Process,
+ const void *in_pLibraryBuffer,
+ uint64_t in_LibraryBufferLength,
+ const char *in_pLibraryName,
+ const char *in_LibrarySearchPath, // Ignored
+ const char *in_FileOfOrigin, // Ignored
+ uint64_t in_FileOfOriginOffset, // Ignored
+ uint32_t in_Flags, // Ignored
+ COILIBRARY *out_pLibrary)
{
COITRACE ("COIProcessLoadLibraryFromMemory");
+ const cmd_t cmd = CMD_OPEN_LIBRARY;
char *lib_path;
- cmd_t cmd = CMD_OPEN_LIBRARY;
int fd;
FILE *file;
size_t len;
/* Convert input arguments. */
- Process *proc = (Process *) process;
+ Process *proc = (Process *) in_Process;
/* Create target library file. */
MALLOC (char *, lib_path,
- strlen (proc->engine->dir) + strlen (lib_name) + 2);
- sprintf (lib_path, "%s/%s", proc->engine->dir, lib_name);
+ strlen (proc->engine->dir) + strlen (in_pLibraryName) + 2);
+ sprintf (lib_path, "%s/%s", proc->engine->dir, in_pLibraryName);
fd = open (lib_path, O_CLOEXEC | O_CREAT | O_WRONLY, S_IRUSR | S_IWUSR);
if (fd < 0)
COIERROR ("Cannot create file %s.", lib_path);
file = fdopen (fd, "wb");
if (file == NULL)
COIERROR ("Cannot associate stream with file descriptor.");
- if (fwrite (lib_buffer, 1, lib_buffer_len, file) != lib_buffer_len)
+ if (fwrite (in_pLibraryBuffer, 1, in_LibraryBufferLength, file)
+ != in_LibraryBufferLength)
COIERROR ("Cannot write in file %s.", lib_path);
if (fclose (file) != 0)
COIERROR ("Cannot close file %s.", lib_path);
@@ -1176,6 +1197,10 @@ SYMBOL_VERSION (COIProcessLoadLibraryFromMemory, 2) (COIPROCESS process,
WRITE (proc->pipeline->pipe_target, &len, sizeof (size_t));
WRITE (proc->pipeline->pipe_target, lib_path, len);
+ /* Receive data from target. */
+ void *handle;
+ READ (proc->pipeline->pipe_host, &handle, sizeof (void *));
+
/* Finish critical section. */
if (pthread_mutex_unlock (&mutex) != 0)
COIERROR ("Cannot unlock mutex.");
@@ -1183,6 +1208,7 @@ SYMBOL_VERSION (COIProcessLoadLibraryFromMemory, 2) (COIPROCESS process,
/* Clean up. */
free (lib_path);
+ *out_pLibrary = (COILIBRARY) handle;
return COI_SUCCESS;
}
@@ -1202,6 +1228,33 @@ SYMBOL_VERSION (COIProcessRegisterLibraries, 1) (uint32_t libraries_num,
}
+COIRESULT
+SYMBOL_VERSION (COIProcessUnloadLibrary, 1) (COIPROCESS in_Process,
+ COILIBRARY in_Library)
+{
+ COITRACE ("COIProcessUnloadLibrary");
+
+ const cmd_t cmd = CMD_CLOSE_LIBRARY;
+
+ /* Convert input arguments. */
+ Process *proc = (Process *) in_Process;
+
+ /* Start critical section. */
+ if (pthread_mutex_lock (&mutex) != 0)
+ COIERROR ("Cannot lock mutex.");
+
+ /* Make target close library. */
+ WRITE (proc->pipeline->pipe_target, &cmd, sizeof (cmd_t));
+ WRITE (proc->pipeline->pipe_target, &in_Library, sizeof (void *));
+
+ /* Finish critical section. */
+ if (pthread_mutex_unlock (&mutex) != 0)
+ COIERROR ("Cannot unlock mutex.");
+
+ return COI_SUCCESS;
+}
+
+
uint64_t
SYMBOL_VERSION (COIPerfGetCycleFrequency, 1) ()
{
@@ -1210,5 +1263,51 @@ SYMBOL_VERSION (COIPerfGetCycleFrequency, 1) ()
return (uint64_t) CYCLE_FREQUENCY;
}
+
+COIRESULT
+SYMBOL_VERSION (COIPipelineClearCPUMask, 1) (COI_CPU_MASK *in_Mask)
+{
+ COITRACE ("COIPipelineClearCPUMask");
+
+ /* Looks like we have nothing to do here. */
+
+ return COI_SUCCESS;
+}
+
+
+COIRESULT
+SYMBOL_VERSION (COIPipelineSetCPUMask, 1) (COIPROCESS in_Process,
+ uint32_t in_CoreID,
+ uint8_t in_ThreadID,
+ COI_CPU_MASK *out_pMask)
+{
+ COITRACE ("COIPipelineSetCPUMask");
+
+ /* Looks like we have nothing to do here. */
+
+ return COI_SUCCESS;
+}
+
+
+COIRESULT
+SYMBOL_VERSION (COIEngineGetInfo, 1) (COIENGINE in_EngineHandle,
+ uint32_t in_EngineInfoSize,
+ COI_ENGINE_INFO *out_pEngineInfo)
+{
+ COITRACE ("COIEngineGetInfo");
+
+ out_pEngineInfo->ISA = COI_ISA_x86_64;
+ out_pEngineInfo->NumCores = 1;
+ out_pEngineInfo->NumThreads = 8;
+ out_pEngineInfo->CoreMaxFrequency = SYMBOL_VERSION(COIPerfGetCycleFrequency,1)() / 1000000;
+ out_pEngineInfo->PhysicalMemory = 1024;
+ out_pEngineInfo->PhysicalMemoryFree = 1024;
+ out_pEngineInfo->SwapMemory = 1024;
+ out_pEngineInfo->SwapMemoryFree = 1024;
+ out_pEngineInfo->MiscFlags = COI_ENG_ECC_DISABLED;
+
+ return COI_SUCCESS;
+}
+
} // extern "C"
diff --git a/liboffloadmic/runtime/emulator/coi_host.h b/liboffloadmic/runtime/emulator/coi_host.h
index 58ebd97ed7ec..82260da9db98 100644
--- a/liboffloadmic/runtime/emulator/coi_host.h
+++ b/liboffloadmic/runtime/emulator/coi_host.h
@@ -1,5 +1,5 @@
/*
- Copyright (c) 2014 Intel Corporation. All Rights Reserved.
+ Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
diff --git a/liboffloadmic/runtime/emulator/coi_version_asm.h b/liboffloadmic/runtime/emulator/coi_version_asm.h
index 672d062d72f0..25806391b29a 100644
--- a/liboffloadmic/runtime/emulator/coi_version_asm.h
+++ b/liboffloadmic/runtime/emulator/coi_version_asm.h
@@ -1,5 +1,5 @@
/*
- * Copyright 2010-2013 Intel Corporation.
+ * Copyright 2010-2015 Intel Corporation.
*
* This library is free software; you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as published
@@ -38,31 +38,54 @@
* intellectual property rights is granted herein.
*/
-__asm__ (".symver COIBufferAddRef1,COIBufferAddRef@@COI_1.0");
-__asm__ (".symver COIBufferCopy1,COIBufferCopy@@COI_1.0");
-__asm__ (".symver COIBufferCreate1,COIBufferCreate@@COI_1.0");
-__asm__ (".symver COIBufferCreateFromMemory1,COIBufferCreateFromMemory@@COI_1.0");
-__asm__ (".symver COIBufferDestroy1,COIBufferDestroy@@COI_1.0");
-__asm__ (".symver COIBufferGetSinkAddress1,COIBufferGetSinkAddress@@COI_1.0");
-__asm__ (".symver COIBufferMap1,COIBufferMap@@COI_1.0");
-__asm__ (".symver COIBufferRead1,COIBufferRead@@COI_1.0");
-__asm__ (".symver COIBufferReleaseRef1,COIBufferReleaseRef@@COI_1.0");
-__asm__ (".symver COIBufferSetState1,COIBufferSetState@@COI_1.0");
-__asm__ (".symver COIBufferUnmap1,COIBufferUnmap@@COI_1.0");
-__asm__ (".symver COIBufferWrite1,COIBufferWrite@@COI_1.0");
-__asm__ (".symver COIEngineGetCount1,COIEngineGetCount@@COI_1.0");
-__asm__ (".symver COIEngineGetHandle1,COIEngineGetHandle@@COI_1.0");
-__asm__ (".symver COIEngineGetIndex1,COIEngineGetIndex@@COI_1.0");
-__asm__ (".symver COIEventWait1,COIEventWait@@COI_1.0");
-__asm__ (".symver COIPerfGetCycleFrequency1,COIPerfGetCycleFrequency@@COI_1.0");
-__asm__ (".symver COIPipelineCreate1,COIPipelineCreate@@COI_1.0");
-__asm__ (".symver COIPipelineDestroy1,COIPipelineDestroy@@COI_1.0");
-__asm__ (".symver COIPipelineRunFunction1,COIPipelineRunFunction@@COI_1.0");
-__asm__ (".symver COIPipelineStartExecutingRunFunctions1,COIPipelineStartExecutingRunFunctions@@COI_1.0");
-__asm__ (".symver COIProcessCreateFromMemory1,COIProcessCreateFromMemory@@COI_1.0");
-__asm__ (".symver COIProcessDestroy1,COIProcessDestroy@@COI_1.0");
-__asm__ (".symver COIProcessGetFunctionHandles1,COIProcessGetFunctionHandles@@COI_1.0");
-__asm__ (".symver COIProcessLoadLibraryFromMemory2,COIProcessLoadLibraryFromMemory@COI_2.0");
-__asm__ (".symver COIProcessRegisterLibraries1,COIProcessRegisterLibraries@@COI_1.0");
-__asm__ (".symver COIProcessWaitForShutdown1,COIProcessWaitForShutdown@@COI_1.0");
-
+// Originally generated via:
+// cd include;
+// ctags -x --c-kinds=fp -R sink/ source/ common/ | grep -v COIX | awk '{print "__asm__(\".symver "$1"1,"$1"@@COI_1.0\");"}'
+//
+// These directives must have an associated linker script with VERSION stuff.
+// See coi_version_linker_script.map
+// Passed in as
+// -Wl,--version-script coi_version_linker_script.map
+// when building Intel(R) Coprocessor Offload Infrastructure (Intel(R) COI)
+//
+// See http://sourceware.org/binutils/docs/ld/VERSION.html#VERSION for more info
+//
+// This is not strictly a .h file, so no need to #pragma once or anything.
+// You must include these asm directives in the same translation unit as the
+// one where the function body is.
+// Otherwise we'd have add this file to the list of files needed to build
+// libcoi*, instead of including it in each of the api/*/*cpp files.
+//
+__asm__(".symver COIBufferAddRef1,COIBufferAddRef@@COI_1.0");
+__asm__(".symver COIBufferCopy1,COIBufferCopy@@COI_1.0");
+__asm__(".symver COIBufferCreate1,COIBufferCreate@@COI_1.0");
+__asm__(".symver COIBufferCreateFromMemory1,COIBufferCreateFromMemory@@COI_1.0");
+__asm__(".symver COIBufferDestroy1,COIBufferDestroy@@COI_1.0");
+__asm__(".symver COIBufferGetSinkAddress1,COIBufferGetSinkAddress@@COI_1.0");
+__asm__(".symver COIBufferMap1,COIBufferMap@@COI_1.0");
+__asm__(".symver COIBufferRead1,COIBufferRead@@COI_1.0");
+__asm__(".symver COIBufferReleaseRef1,COIBufferReleaseRef@@COI_1.0");
+__asm__(".symver COIBufferSetState1,COIBufferSetState@@COI_1.0");
+__asm__(".symver COIBufferUnmap1,COIBufferUnmap@@COI_1.0");
+__asm__(".symver COIBufferWrite1,COIBufferWrite@@COI_1.0");
+__asm__(".symver COIEngineGetCount1,COIEngineGetCount@@COI_1.0");
+__asm__(".symver COIEngineGetHandle1,COIEngineGetHandle@@COI_1.0");
+__asm__(".symver COIEngineGetIndex1,COIEngineGetIndex@@COI_1.0");
+__asm__(".symver COIEngineGetInfo1,COIEngineGetInfo@@COI_1.0");
+__asm__(".symver COIEventRegisterCallback1,COIEventRegisterCallback@@COI_1.0");
+__asm__(".symver COIEventWait1,COIEventWait@@COI_1.0");
+__asm__(".symver COIPerfGetCycleFrequency1,COIPerfGetCycleFrequency@@COI_1.0");
+__asm__(".symver COIPipelineClearCPUMask1,COIPipelineClearCPUMask@@COI_1.0");
+__asm__(".symver COIPipelineCreate1,COIPipelineCreate@@COI_1.0");
+__asm__(".symver COIPipelineDestroy1,COIPipelineDestroy@@COI_1.0");
+__asm__(".symver COIPipelineRunFunction1,COIPipelineRunFunction@@COI_1.0");
+__asm__(".symver COIPipelineSetCPUMask1,COIPipelineSetCPUMask@@COI_1.0");
+__asm__(".symver COIPipelineStartExecutingRunFunctions1,COIPipelineStartExecutingRunFunctions@@COI_1.0");
+__asm__(".symver COIProcessCreateFromFile1,COIProcessCreateFromFile@@COI_1.0");
+__asm__(".symver COIProcessCreateFromMemory1,COIProcessCreateFromMemory@@COI_1.0");
+__asm__(".symver COIProcessDestroy1,COIProcessDestroy@@COI_1.0");
+__asm__(".symver COIProcessGetFunctionHandles1,COIProcessGetFunctionHandles@@COI_1.0");
+__asm__(".symver COIProcessLoadLibraryFromMemory2,COIProcessLoadLibraryFromMemory@COI_2.0");
+__asm__(".symver COIProcessRegisterLibraries1,COIProcessRegisterLibraries@@COI_1.0");
+__asm__(".symver COIProcessUnloadLibrary1,COIProcessUnloadLibrary@@COI_1.0");
+__asm__(".symver COIProcessWaitForShutdown1,COIProcessWaitForShutdown@@COI_1.0");
diff --git a/liboffloadmic/runtime/emulator/coi_version_linker_script.map b/liboffloadmic/runtime/emulator/coi_version_linker_script.map
index 496713fb4f7d..a98cbc6e7840 100644
--- a/liboffloadmic/runtime/emulator/coi_version_linker_script.map
+++ b/liboffloadmic/runtime/emulator/coi_version_linker_script.map
@@ -1,5 +1,5 @@
/*
- * Copyright 2010-2013 Intel Corporation.
+ * Copyright 2010-2015 Intel Corporation.
*
* This library is free software; you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as published
@@ -38,6 +38,12 @@
* intellectual property rights is granted herein.
*/
+/***
+* See http://sourceware.org/binutils/docs/ld/VERSION.html#VERSION for more info.
+* Use this in conjunction with coi_version_asm.h.
+* // Comments don't work in this file.
+***/
+
COI_1.0
{
global:
@@ -56,17 +62,23 @@ COI_1.0
COIEngineGetCount;
COIEngineGetHandle;
COIEngineGetIndex;
+ COIEngineGetInfo;
COIEventWait;
+ COIEventRegisterCallback;
COIPerfGetCycleFrequency;
+ COIPipelineClearCPUMask;
COIPipelineCreate;
COIPipelineDestroy;
COIPipelineRunFunction;
+ COIPipelineSetCPUMask;
COIPipelineStartExecutingRunFunctions;
+ COIProcessCreateFromFile;
COIProcessCreateFromMemory;
COIProcessDestroy;
COIProcessGetFunctionHandles;
COIProcessLoadLibraryFromMemory;
COIProcessRegisterLibraries;
+ COIProcessUnloadLibrary;
COIProcessWaitForShutdown;
local:
*;
diff --git a/liboffloadmic/runtime/emulator/myo_client.cpp b/liboffloadmic/runtime/emulator/myo_client.cpp
index bee59f0e1131..d9d5f309ed18 100644
--- a/liboffloadmic/runtime/emulator/myo_client.cpp
+++ b/liboffloadmic/runtime/emulator/myo_client.cpp
@@ -1,5 +1,5 @@
/*
- Copyright (c) 2014 Intel Corporation. All Rights Reserved.
+ Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
diff --git a/liboffloadmic/runtime/emulator/myo_service.cpp b/liboffloadmic/runtime/emulator/myo_service.cpp
index e18abecd05cd..0473253f731c 100644
--- a/liboffloadmic/runtime/emulator/myo_service.cpp
+++ b/liboffloadmic/runtime/emulator/myo_service.cpp
@@ -1,5 +1,5 @@
/*
- Copyright (c) 2014 Intel Corporation. All Rights Reserved.
+ Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
@@ -155,5 +155,49 @@ SYMBOL_VERSION (myoiTargetFptrTableRegister, 1) (void *table,
return MYO_ERROR;
}
+
+MYOACCESSAPI MyoError
+SYMBOL_VERSION (myoArenaRelease, 1) (MyoArena in_Arena)
+{
+ MYOTRACE ("myoArenaRelease");
+
+ assert (false);
+
+ return MYO_ERROR;
+}
+
+
+MYOACCESSAPI MyoError
+SYMBOL_VERSION (myoArenaAcquire, 1) (MyoArena in_Arena)
+{
+ MYOTRACE ("myoArenaAcquire");
+
+ assert (false);
+
+ return MYO_ERROR;
+}
+
+
+MYOACCESSAPI void
+SYMBOL_VERSION (myoArenaAlignedFree, 1) (MyoArena in_Arena, void *in_pPtr)
+{
+ MYOTRACE ("myoArenaAlignedFree");
+
+ assert (false);
+}
+
+
+MYOACCESSAPI void *
+SYMBOL_VERSION (myoArenaAlignedMalloc, 1) (MyoArena in_Arena, size_t in_Size,
+ size_t in_Alignment)
+{
+ MYOTRACE ("myoArenaAlignedMalloc");
+
+ assert (false);
+
+ return 0;
+}
+
+
} // extern "C"
diff --git a/liboffloadmic/runtime/emulator/myo_service.h b/liboffloadmic/runtime/emulator/myo_service.h
index 776e8c2c40d7..ffa4a5f8dcd8 100644
--- a/liboffloadmic/runtime/emulator/myo_service.h
+++ b/liboffloadmic/runtime/emulator/myo_service.h
@@ -1,5 +1,5 @@
/*
- Copyright (c) 2014 Intel Corporation. All Rights Reserved.
+ Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
diff --git a/liboffloadmic/runtime/emulator/myo_version_asm.h b/liboffloadmic/runtime/emulator/myo_version_asm.h
index 2bd8302ab6a7..f4db3ca4ab74 100644
--- a/liboffloadmic/runtime/emulator/myo_version_asm.h
+++ b/liboffloadmic/runtime/emulator/myo_version_asm.h
@@ -1,5 +1,5 @@
/*
- * Copyright 2010-2013 Intel Corporation.
+ * Copyright 2010-2015 Intel Corporation.
*
* This library is free software; you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as published
@@ -38,16 +38,24 @@
* intellectual property rights is granted herein.
*/
-__asm__ (".symver myoAcquire1,myoAcquire@@MYO_1.0");
-__asm__ (".symver myoRelease1,myoRelease@@MYO_1.0");
-__asm__ (".symver myoSharedAlignedFree1,myoSharedAlignedFree@@MYO_1.0");
-__asm__ (".symver myoSharedAlignedMalloc1,myoSharedAlignedMalloc@@MYO_1.0");
-__asm__ (".symver myoSharedFree1,myoSharedFree@@MYO_1.0");
-__asm__ (".symver myoSharedMalloc1,myoSharedMalloc@@MYO_1.0");
+/*Version for Symbols( only Functions currently versioned)
+Only that Linux Host Side code is versioned currently*/
+#if (! defined MYO_MIC_CARD) && (! defined _WIN32)
-__asm__ (".symver myoiLibInit1,myoiLibInit@@MYO_1.0");
-__asm__ (".symver myoiLibFini1,myoiLibFini@@MYO_1.0");
-__asm__ (".symver myoiMicVarTableRegister1,myoiMicVarTableRegister@@MYO_1.0");
-__asm__ (".symver myoiRemoteFuncRegister1,myoiRemoteFuncRegister@@MYO_1.0");
-__asm__ (".symver myoiTargetFptrTableRegister1,myoiTargetFptrTableRegister@@MYO_1.0");
+ __asm__(".symver myoArenaAlignedMalloc1,myoArenaAlignedMalloc@@MYO_1.0");
+ __asm__(".symver myoArenaAlignedFree1,myoArenaAlignedFree@@MYO_1.0");
+ __asm__(".symver myoArenaAcquire1,myoArenaAcquire@@MYO_1.0");
+ __asm__(".symver myoArenaRelease1,myoArenaRelease@@MYO_1.0");
+ __asm__(".symver myoAcquire1,myoAcquire@@MYO_1.0");
+ __asm__(".symver myoRelease1,myoRelease@@MYO_1.0");
+ __asm__(".symver myoSharedAlignedFree1,myoSharedAlignedFree@@MYO_1.0");
+ __asm__(".symver myoSharedAlignedMalloc1,myoSharedAlignedMalloc@@MYO_1.0");
+ __asm__(".symver myoSharedFree1,myoSharedFree@@MYO_1.0");
+ __asm__(".symver myoSharedMalloc1,myoSharedMalloc@@MYO_1.0");
+ __asm__(".symver myoiLibInit1,myoiLibInit@@MYO_1.0");
+ __asm__(".symver myoiLibFini1,myoiLibFini@@MYO_1.0");
+ __asm__(".symver myoiMicVarTableRegister1,myoiMicVarTableRegister@@MYO_1.0");
+ __asm__(".symver myoiRemoteFuncRegister1,myoiRemoteFuncRegister@@MYO_1.0");
+ __asm__(".symver myoiTargetFptrTableRegister1,myoiTargetFptrTableRegister@@MYO_1.0");
+#endif
diff --git a/liboffloadmic/runtime/emulator/myo_version_linker_script.map b/liboffloadmic/runtime/emulator/myo_version_linker_script.map
index 361b289d1b65..8f065bbb4ac4 100644
--- a/liboffloadmic/runtime/emulator/myo_version_linker_script.map
+++ b/liboffloadmic/runtime/emulator/myo_version_linker_script.map
@@ -1,5 +1,5 @@
/*
- * Copyright 2010-2013 Intel Corporation.
+ * Copyright 2010-2015 Intel Corporation.
*
* This library is free software; you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as published
@@ -38,9 +38,17 @@
* intellectual property rights is granted herein.
*/
+/***
+* See http://sourceware.org/binutils/docs/ld/VERSION.html#VERSION for more info.
+***/
+
MYO_1.0
{
global:
+ myoArenaAlignedMalloc;
+ myoArenaAlignedFree;
+ myoArenaAcquire;
+ myoArenaRelease;
myoAcquire;
myoRelease;
myoSharedAlignedFree;
diff --git a/liboffloadmic/runtime/liboffload_error.c b/liboffloadmic/runtime/liboffload_error.c
index eb5699d3c5cc..29dcb6c8507c 100644
--- a/liboffloadmic/runtime/liboffload_error.c
+++ b/liboffloadmic/runtime/liboffload_error.c
@@ -1,5 +1,5 @@
/*
- Copyright (c) 2014 Intel Corporation. All Rights Reserved.
+ Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
@@ -144,6 +144,9 @@ void __liboffload_error_support(error_types input_tag, ...)
case c_process_create:
write_message(stderr, msg_c_process_create, args);
break;
+ case c_process_set_cache_size:
+ write_message(stderr, msg_c_process_set_cache_size, args);
+ break;
case c_process_wait_shutdown:
write_message(stderr, msg_c_process_wait_shutdown, args);
break;
@@ -216,6 +219,9 @@ void __liboffload_error_support(error_types input_tag, ...)
case c_zero_or_neg_transfer_size:
write_message(stderr, msg_c_zero_or_neg_transfer_size, args);
break;
+ case c_bad_ptr_mem_alloc:
+ write_message(stderr, msg_c_bad_ptr_mem_alloc, args);
+ break;
case c_bad_ptr_mem_range:
write_message(stderr, msg_c_bad_ptr_mem_range, args);
break;
@@ -258,6 +264,39 @@ void __liboffload_error_support(error_types input_tag, ...)
case c_report_unknown_trace_node:
write_message(stderr, msg_c_report_unknown_trace_node, args);
break;
+ case c_incorrect_affinity:
+ write_message(stderr, msg_c_incorrect_affinity, args);
+ break;
+ case c_cannot_set_affinity:
+ write_message(stderr, msg_c_cannot_set_affinity, args);
+ break;
+ case c_in_with_preallocated:
+ write_message(stderr, msg_c_in_with_preallocated, args);
+ break;
+ case c_report_no_host_exe:
+ write_message(stderr, msg_c_report_no_host_exe, args);
+ break;
+ case c_report_path_buff_overflow:
+ write_message(stderr, msg_c_report_path_buff_overflow, args);
+ break;
+ case c_create_pipeline_for_stream:
+ write_message(stderr, msg_c_create_pipeline_for_stream, args);
+ break;
+ case c_offload_no_stream:
+ write_message(stderr, msg_c_offload_no_stream, args);
+ break;
+ case c_get_engine_info:
+ write_message(stderr, msg_c_get_engine_info, args);
+ break;
+ case c_clear_cpu_mask:
+ write_message(stderr, msg_c_clear_cpu_mask, args);
+ break;
+ case c_set_cpu_mask:
+ write_message(stderr, msg_c_set_cpu_mask, args);
+ break;
+ case c_unload_library:
+ write_message(stderr, msg_c_unload_library, args);
+ break;
}
va_end(args);
}
@@ -374,6 +413,10 @@ char const * report_get_message_str(error_types input_tag)
return (offload_get_message_str(msg_c_report_unregister));
case c_report_var:
return (offload_get_message_str(msg_c_report_var));
+ case c_report_stream:
+ return (offload_get_message_str(msg_c_report_stream));
+ case c_report_state_stream:
+ return (offload_get_message_str(msg_c_report_state_stream));
default:
LIBOFFLOAD_ERROR(c_report_unknown_trace_node);
diff --git a/liboffloadmic/runtime/liboffload_error_codes.h b/liboffloadmic/runtime/liboffload_error_codes.h
index c33bef562365..d580851271a6 100644
--- a/liboffloadmic/runtime/liboffload_error_codes.h
+++ b/liboffloadmic/runtime/liboffload_error_codes.h
@@ -1,5 +1,5 @@
/*
- Copyright (c) 2014 Intel Corporation. All Rights Reserved.
+ Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
@@ -68,6 +68,7 @@ typedef enum
c_get_engine_handle,
c_get_engine_index,
c_process_create,
+ c_process_set_cache_size,
c_process_get_func_handles,
c_process_wait_shutdown,
c_process_proxy_flush,
@@ -91,6 +92,7 @@ typedef enum
c_event_wait,
c_zero_or_neg_ptr_len,
c_zero_or_neg_transfer_size,
+ c_bad_ptr_mem_alloc,
c_bad_ptr_mem_range,
c_different_src_and_dstn_sizes,
c_ranges_dont_match,
@@ -103,6 +105,8 @@ typedef enum
c_unknown_binary_type,
c_multiple_target_exes,
c_no_target_exe,
+ c_incorrect_affinity,
+ c_cannot_set_affinity,
c_report_host,
c_report_target,
c_report_title,
@@ -159,7 +163,24 @@ typedef enum
c_report_myosharedalignedfree,
c_report_myoacquire,
c_report_myorelease,
- c_coipipe_max_number
+ c_report_myosupportsfeature,
+ c_report_myosharedarenacreate,
+ c_report_myosharedalignedarenamalloc,
+ c_report_myosharedalignedarenafree,
+ c_report_myoarenaacquire,
+ c_report_myoarenarelease,
+ c_coipipe_max_number,
+ c_in_with_preallocated,
+ c_report_no_host_exe,
+ c_report_path_buff_overflow,
+ c_create_pipeline_for_stream,
+ c_offload_no_stream,
+ c_get_engine_info,
+ c_clear_cpu_mask,
+ c_set_cpu_mask,
+ c_report_state_stream,
+ c_report_stream,
+ c_unload_library
} error_types;
enum OffloadHostPhase {
@@ -260,15 +281,21 @@ enum OffloadTargetPhase {
c_offload_target_max_phase
};
+#ifdef TARGET_WINNT
+ #define DLL_LOCAL
+#else
+ #define DLL_LOCAL __attribute__((visibility("hidden")))
+#endif
+
#ifdef __cplusplus
extern "C" {
#endif
-void __liboffload_error_support(error_types input_tag, ...);
-void __liboffload_report_support(error_types input_tag, ...);
-char const *offload_get_message_str(int msgCode);
-char const * report_get_message_str(error_types input_tag);
-char const * report_get_host_stage_str(int i);
-char const * report_get_target_stage_str(int i);
+DLL_LOCAL void __liboffload_error_support(error_types input_tag, ...);
+DLL_LOCAL void __liboffload_report_support(error_types input_tag, ...);
+DLL_LOCAL char const *offload_get_message_str(int msgCode);
+DLL_LOCAL char const * report_get_message_str(error_types input_tag);
+DLL_LOCAL char const * report_get_host_stage_str(int i);
+DLL_LOCAL char const * report_get_target_stage_str(int i);
#ifdef __cplusplus
}
#endif
@@ -281,7 +308,7 @@ char const * report_get_target_stage_str(int i);
fprintf(stderr, "\t TEST for %s \n \t", nm); \
__liboffload_error_support(msg, __VA_ARGS__);
-void write_message(FILE * file, int msgCode, va_list args_p);
+DLL_LOCAL void write_message(FILE * file, int msgCode, va_list args_p);
#define LIBOFFLOAD_ERROR __liboffload_error_support
diff --git a/liboffloadmic/runtime/liboffload_msg.c b/liboffloadmic/runtime/liboffload_msg.c
index c6d9fa7db1e4..3d3784c5eda7 100644
--- a/liboffloadmic/runtime/liboffload_msg.c
+++ b/liboffloadmic/runtime/liboffload_msg.c
@@ -1,5 +1,5 @@
/*
- Copyright (c) 2014 Intel Corporation. All Rights Reserved.
+ Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
@@ -28,7 +28,6 @@
*/
-
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
@@ -55,7 +54,7 @@
va_copy(args, args_p);
buf[0] = '\n';
vsnprintf(buf + 1, sizeof(buf) - 2,
- MESSAGE_TABLE_NAME[ msgCode ], args);
+ MESSAGE_TABLE_NAME[ msgCode ], args);
strcat(buf, "\n");
va_end(args);
fputs(buf, file);
diff --git a/liboffloadmic/runtime/liboffload_msg.h b/liboffloadmic/runtime/liboffload_msg.h
index e43b6b63551b..f1bae7712e3d 100644
--- a/liboffloadmic/runtime/liboffload_msg.h
+++ b/liboffloadmic/runtime/liboffload_msg.h
@@ -1,5 +1,5 @@
/*
- Copyright (c) 2014 Intel Corporation. All Rights Reserved.
+ Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
@@ -62,6 +62,7 @@ enum {
msg_c_get_engine_handle,
msg_c_get_engine_index,
msg_c_process_create,
+ msg_c_process_set_cache_size,
msg_c_process_get_func_handles,
msg_c_process_wait_shutdown,
msg_c_process_proxy_flush,
@@ -85,6 +86,7 @@ enum {
msg_c_event_wait,
msg_c_zero_or_neg_ptr_len,
msg_c_zero_or_neg_transfer_size,
+ msg_c_bad_ptr_mem_alloc,
msg_c_bad_ptr_mem_range,
msg_c_different_src_and_dstn_sizes,
msg_c_non_contiguous_dope_vector,
@@ -95,6 +97,8 @@ enum {
msg_c_no_target_exe,
msg_c_report_unknown_timer_node,
msg_c_report_unknown_trace_node,
+ msg_c_incorrect_affinity,
+ msg_c_cannot_set_affinity,
msg_c_report_host,
msg_c_report_mic,
msg_c_report_title,
@@ -148,6 +152,12 @@ enum {
msg_c_report_myosharedalignedfree,
msg_c_report_myoacquire,
msg_c_report_myorelease,
+ msg_c_report_myosupportsfeature,
+ msg_c_report_myosharedarenacreate,
+ msg_c_report_myosharedalignedarenamalloc,
+ msg_c_report_myosharedalignedarenafree,
+ msg_c_report_myoarenaacquire,
+ msg_c_report_myoarenarelease,
msg_c_report_host_total_offload_time,
msg_c_report_host_initialize,
msg_c_report_host_target_acquire,
@@ -182,7 +192,18 @@ enum {
msg_c_destination_is_over,
msg_c_slice_of_noncont_array,
msg_c_pointer_array_mismatch,
- lastMsg = 153,
+ msg_c_in_with_preallocated,
+ msg_c_report_no_host_exe,
+ msg_c_report_path_buff_overflow,
+ msg_c_create_pipeline_for_stream,
+ msg_c_offload_no_stream,
+ msg_c_get_engine_info,
+ msg_c_clear_cpu_mask,
+ msg_c_set_cpu_mask,
+ msg_c_report_state_stream,
+ msg_c_report_stream,
+ msg_c_unload_library,
+ lastMsg = 174,
firstMsg = 1
};
@@ -192,157 +213,178 @@ enum {
#endif
static char const * MESSAGE_TABLE_NAME[] = {
- /* 0 __dummy__ */ "Un-used message",
- /* 1 msg_c_device_is_not_available */ "offload error: cannot offload to MIC - device is not available",
- /* 2 msg_c_invalid_device_number */ "offload error: expected a number greater than or equal to -1",
- /* 3 msg_c_send_func_ptr */ "offload error: cannot find function name for address %p",
- /* 4 msg_c_receive_func_ptr */ "offload error: cannot find address of function %s",
- /* 5 msg_c_malloc */ "offload error: memory allocation failed",
- /* 6 msg_c_offload_malloc */ "offload error: memory allocation failed (requested=%lld bytes, align %lld)",
- /* 7 msg_c_offload1 */ "offload error: device %d does not have a pending signal for wait(%p)",
- /* 8 msg_c_unknown_var_type */ "offload error: unknown variable type %d",
- /* 9 msg_c_invalid_env_var_value */ "offload warning: ignoring invalid value specified for %s",
- /* 10 msg_c_invalid_env_var_int_value */ "offload warning: specify an integer value for %s",
- /* 11 msg_c_invalid_env_report_value */ "offload warning: ignoring %s setting; use a value in range 1-3",
- /* 12 msg_c_offload_signaled1 */ "offload error: invalid device number %d specified in _Offload_signaled",
- /* 13 msg_c_offload_signaled2 */ "offload error: invalid signal %p specified for _Offload_signaled",
- /* 14 msg_c_myowrapper_checkresult */ "offload error: %s failed with error %d",
- /* 15 msg_c_myotarget_checkresult */ "offload error: %s failed with error %d",
- /* 16 msg_c_offload_descriptor_offload */ "offload error: cannot find offload entry %s",
- /* 17 msg_c_merge_var_descs1 */ "offload error: unexpected number of variable descriptors",
- /* 18 msg_c_merge_var_descs2 */ "offload error: unexpected variable type",
- /* 19 msg_c_mic_parse_env_var_list1 */ "offload_error: MIC environment variable must begin with an alpabetic character",
- /* 20 msg_c_mic_parse_env_var_list2 */ "offload_error: MIC environment variable value must be specified with \'=\'",
- /* 21 msg_c_mic_process_exit_ret */ "offload error: process on the device %d unexpectedly exited with code %d",
- /* 22 msg_c_mic_process_exit_sig */ "offload error: process on the device %d was terminated by signal %d (%s)",
- /* 23 msg_c_mic_process_exit */ "offload error: process on the device %d was unexpectedly terminated",
- /* 24 msg_c_mic_init3 */ "offload warning: ignoring MIC_STACKSIZE setting; use a value >= 16K and a multiple of 4K",
- /* 25 msg_c_mic_init4 */ "offload error: thread key create failed with error %d",
- /* 26 msg_c_mic_init5 */ "offload warning: specify OFFLOAD_DEVICES as comma-separated physical device numbers or 'none'",
- /* 27 msg_c_mic_init6 */ "offload warning: OFFLOAD_DEVICES device number %d does not correspond to a physical device",
- /* 28 msg_c_no_static_var_data */ "offload error: cannot find data associated with statically allocated variable %p",
- /* 29 msg_c_no_ptr_data */ "offload error: cannot find data associated with pointer variable %p",
- /* 30 msg_c_get_engine_handle */ "offload error: cannot get device %d handle (error code %d)",
- /* 31 msg_c_get_engine_index */ "offload error: cannot get physical index for logical device %d (error code %d)",
- /* 32 msg_c_process_create */ "offload error: cannot start process on the device %d (error code %d)",
- /* 33 msg_c_process_get_func_handles */ "offload error: cannot get function handles on the device %d (error code %d)",
- /* 34 msg_c_process_wait_shutdown */ "offload error: wait for process shutdown failed on device %d (error code %d)",
- /* 35 msg_c_process_proxy_flush */ "offload error: cannot flush process output on device %d (error code %d)",
- /* 36 msg_c_load_library */ "offload error: cannot load library to the device %d (error code %d)",
- /* 37 msg_c_pipeline_create */ "offload error: cannot create pipeline on the device %d (error code %d)",
- /* 38 msg_c_pipeline_run_func */ "offload error: cannot execute function on the device %d (error code %d)",
- /* 39 msg_c_pipeline_start_run_funcs */ "offload error: cannot start executing pipeline function on the device %d (error code %d)",
- /* 40 msg_c_buf_create */ "offload error: cannot create buffer on device %d (error code %d)",
- /* 41 msg_c_buf_create_out_of_mem */ "offload error: cannot create buffer on device %d, out of memory",
- /* 42 msg_c_buf_create_from_mem */ "offload error: cannot create buffer from memory on device %d (error code %d)",
- /* 43 msg_c_buf_destroy */ "offload error: buffer destroy failed (error code %d)",
- /* 44 msg_c_buf_map */ "offload error: buffer map failed (error code %d)",
- /* 45 msg_c_buf_unmap */ "offload error: buffer unmap failed (error code %d)",
- /* 46 msg_c_buf_read */ "offload error: buffer read failed (error code %d)",
- /* 47 msg_c_buf_write */ "offload error: buffer write failed (error code %d)",
- /* 48 msg_c_buf_copy */ "offload error: buffer copy failed (error code %d)",
- /* 49 msg_c_buf_get_address */ "offload error: cannot get buffer address on device %d (error code %d)",
- /* 50 msg_c_buf_add_ref */ "offload error: cannot reuse buffer memory on device %d (error code %d)",
- /* 51 msg_c_buf_release_ref */ "offload error: cannot release buffer memory on device %d (error code %d)",
- /* 52 msg_c_buf_set_state */ "offload error: buffer set state failed (error code %d)",
- /* 53 msg_c_event_wait */ "offload error: wait for event to become signaled failed (error code %d)",
- /* 54 msg_c_zero_or_neg_ptr_len */ "offload error: memory allocation of negative length is not supported",
- /* 55 msg_c_zero_or_neg_transfer_size */ "offload error: data transfer of zero or negative size is not supported",
- /* 56 msg_c_bad_ptr_mem_range */ "offload error: address range partially overlaps with existing allocation",
- /* 57 msg_c_different_src_and_dstn_sizes */ "offload error: size of the source %d differs from size of the destination %d",
- /* 58 msg_c_non_contiguous_dope_vector */ "offload error: offload data transfer supports only a single contiguous memory range per variable",
- /* 59 msg_c_omp_invalid_device_num_env */ "offload warning: ignoring %s setting; use a non-negative integer value",
- /* 60 msg_c_omp_invalid_device_num */ "offload error: device number should be a non-negative integer value",
- /* 61 msg_c_unknown_binary_type */ "offload error: unexpected embedded target binary type, expected either an executable or shared library",
- /* 62 msg_c_multiple_target_exes */ "offload error: more that one target executable found",
- /* 63 msg_c_no_target_exe */ "offload error: target executable is not available",
- /* 64 msg_c_report_unknown_timer_node */ "offload error: unknown timer node",
- /* 65 msg_c_report_unknown_trace_node */ "offload error: unknown trace node",
- /* 66 msg_c_report_host */ "HOST",
- /* 67 msg_c_report_mic */ "MIC",
- /* 68 msg_c_report_title */ "timer data (sec)",
- /* 69 msg_c_report_seconds */ "(seconds)",
- /* 70 msg_c_report_bytes */ "(bytes)",
- /* 71 msg_c_report_cpu_time */ "CPU Time",
- /* 72 msg_c_report_mic_time */ "MIC Time",
- /* 73 msg_c_report_tag */ "Tag",
- /* 74 msg_c_report_from_file */ "Offload from file",
- /* 75 msg_c_report_file */ "File",
- /* 76 msg_c_report_line */ "Line",
- /* 77 msg_c_report_cpu_to_mic_data */ "CPU->MIC Data",
- /* 78 msg_c_report_mic_to_cpu_data */ "MIC->CPU Data",
- /* 79 msg_c_report_offload */ "Offload",
- /* 80 msg_c_report_w_tag */ "Tag %d",
- /* 81 msg_c_report_state */ "State",
- /* 82 msg_c_report_start */ "Start target",
- /* 83 msg_c_report_init */ "Initialize",
- /* 84 msg_c_report_logical_card */ "logical card",
- /* 85 msg_c_report_physical_card */ "physical card",
- /* 86 msg_c_report_register */ "Register static data tables",
- /* 87 msg_c_report_init_func */ "Setup target entry",
- /* 88 msg_c_report_create_buf_host */ "Create host buffer",
- /* 89 msg_c_report_create_buf_mic */ "Create target buffer",
- /* 90 msg_c_report_send_pointer_data */ "Send pointer data",
- /* 91 msg_c_report_sent_pointer_data */ "Host->target pointer data",
- /* 92 msg_c_report_gather_copyin_data */ "Gather copyin data",
- /* 93 msg_c_report_copyin_data */ "Host->target copyin data",
- /* 94 msg_c_report_state_signal */ "Signal",
- /* 95 msg_c_report_signal */ "signal :",
- /* 96 msg_c_report_wait */ "waits :",
- /* 97 msg_c_report_compute */ "Execute task on target",
- /* 98 msg_c_report_receive_pointer_data */ "Receive pointer data",
- /* 99 msg_c_report_received_pointer_data */ "Target->host pointer data",
- /* 100 msg_c_report_start_target_func */ "Start target entry",
- /* 101 msg_c_report_var */ "Var",
- /* 102 msg_c_report_scatter_copyin_data */ "Scatter copyin data",
- /* 103 msg_c_report_gather_copyout_data */ "Gather copyout data",
- /* 104 msg_c_report_scatter_copyout_data */ "Scatter copyout data",
- /* 105 msg_c_report_copyout_data */ "Target->host copyout data",
- /* 106 msg_c_report_unregister */ "Unregister data tables",
- /* 107 msg_c_report_destroy */ "Destroy",
- /* 108 msg_c_report_myoinit */ "Initialize MYO",
- /* 109 msg_c_report_myoregister */ "Register MYO tables",
- /* 110 msg_c_report_myofini */ "Finalize MYO",
- /* 111 msg_c_report_mic_myo_shared */ "MIC MYO shared table register",
- /* 112 msg_c_report_mic_myo_fptr */ "MIC MYO fptr table register",
- /* 113 msg_c_report_myosharedmalloc */ "MYO shared malloc",
- /* 114 msg_c_report_myosharedfree */ "MYO shared free",
- /* 115 msg_c_report_myosharedalignedmalloc */ "MYO shared aligned malloc",
- /* 116 msg_c_report_myosharedalignedfree */ "MYO shared aligned free",
- /* 117 msg_c_report_myoacquire */ "MYO acquire",
- /* 118 msg_c_report_myorelease */ "MYO release",
- /* 119 msg_c_report_host_total_offload_time */ "host: total offload time",
- /* 120 msg_c_report_host_initialize */ "host: initialize target",
- /* 121 msg_c_report_host_target_acquire */ "host: acquire target",
- /* 122 msg_c_report_host_wait_deps */ "host: wait dependencies",
- /* 123 msg_c_report_host_setup_buffers */ "host: setup buffers",
- /* 124 msg_c_report_host_alloc_buffers */ "host: allocate buffers",
- /* 125 msg_c_report_host_setup_misc_data */ "host: setup misc_data",
- /* 126 msg_c_report_host_alloc_data_buffer */ "host: allocate buffer",
- /* 127 msg_c_report_host_send_pointers */ "host: send pointers",
- /* 128 msg_c_report_host_gather_inputs */ "host: gather inputs",
- /* 129 msg_c_report_host_map_in_data_buffer */ "host: map IN data buffer",
- /* 130 msg_c_report_host_unmap_in_data_buffer */ "host: unmap IN data buffer",
- /* 131 msg_c_report_host_start_compute */ "host: initiate compute",
- /* 132 msg_c_report_host_wait_compute */ "host: wait compute",
- /* 133 msg_c_report_host_start_buffers_reads */ "host: initiate pointer reads",
- /* 134 msg_c_report_host_scatter_outputs */ "host: scatter outputs",
- /* 135 msg_c_report_host_map_out_data_buffer */ "host: map OUT data buffer",
- /* 136 msg_c_report_host_unmap_out_data_buffer */ "host: unmap OUT data buffer",
- /* 137 msg_c_report_host_wait_buffers_reads */ "host: wait pointer reads",
- /* 138 msg_c_report_host_destroy_buffers */ "host: destroy buffers",
- /* 139 msg_c_report_target_total_time */ "target: total time",
- /* 140 msg_c_report_target_descriptor_setup */ "target: setup offload descriptor",
- /* 141 msg_c_report_target_func_lookup */ "target: entry lookup",
- /* 142 msg_c_report_target_func_time */ "target: entry time",
- /* 143 msg_c_report_target_scatter_inputs */ "target: scatter inputs",
- /* 144 msg_c_report_target_add_buffer_refs */ "target: add buffer reference",
- /* 145 msg_c_report_target_compute */ "target: compute",
- /* 146 msg_c_report_target_gather_outputs */ "target: gather outputs",
- /* 147 msg_c_report_target_release_buffer_refs */ "target: remove buffer reference",
- /* 148 msg_c_coi_pipeline_max_number */ "number of host threads doing offload exceeds maximum of %d",
- /* 149 msg_c_ranges_dont_match */ "ranges of source and destination don't match together",
- /* 150 msg_c_destination_is_over */ "insufficient destination memory to transfer source",
- /* 151 msg_c_slice_of_noncont_array */ "a non-contiguous slice may be taken of contiguous arrays only",
- /* 152 msg_c_pointer_array_mismatch */ "number of %s elements is less than described by the source",
+ /* 0 __dummy__ */ "Un-used message",
+ /* 1 msg_c_device_is_not_available */ "offload error: cannot offload to MIC - device is not available",
+ /* 2 msg_c_invalid_device_number */ "offload error: expected a number greater than or equal to -1",
+ /* 3 msg_c_send_func_ptr */ "offload error: cannot find function name for address %p",
+ /* 4 msg_c_receive_func_ptr */ "offload error: cannot find address of function %s",
+ /* 5 msg_c_malloc */ "offload error: memory allocation failed",
+ /* 6 msg_c_offload_malloc */ "offload error: memory allocation failed (requested=%lld bytes, align %lld)",
+ /* 7 msg_c_offload1 */ "offload error: device %d does not have a pending signal for wait(%p)",
+ /* 8 msg_c_unknown_var_type */ "offload error: unknown variable type %d",
+ /* 9 msg_c_invalid_env_var_value */ "offload warning: ignoring invalid value specified for %s",
+ /* 10 msg_c_invalid_env_var_int_value */ "offload warning: specify an integer value for %s",
+ /* 11 msg_c_invalid_env_report_value */ "offload warning: ignoring %s setting; use a value in range 1-3",
+ /* 12 msg_c_offload_signaled1 */ "offload error: invalid device number %d specified in _Offload_signaled",
+ /* 13 msg_c_offload_signaled2 */ "offload error: invalid signal %p specified for _Offload_signaled",
+ /* 14 msg_c_myowrapper_checkresult */ "offload error: %s failed with error %d",
+ /* 15 msg_c_myotarget_checkresult */ "offload error: %s failed with error %d",
+ /* 16 msg_c_offload_descriptor_offload */ "offload error: cannot find offload entry %s",
+ /* 17 msg_c_merge_var_descs1 */ "offload error: unexpected number of variable descriptors",
+ /* 18 msg_c_merge_var_descs2 */ "offload error: unexpected variable type",
+ /* 19 msg_c_mic_parse_env_var_list1 */ "offload_error: MIC environment variable must begin with an alpabetic character",
+ /* 20 msg_c_mic_parse_env_var_list2 */ "offload_error: MIC environment variable value must be specified with '='",
+ /* 21 msg_c_mic_process_exit_ret */ "offload error: process on the device %d unexpectedly exited with code %d",
+ /* 22 msg_c_mic_process_exit_sig */ "offload error: process on the device %d was terminated by signal %d (%s)",
+ /* 23 msg_c_mic_process_exit */ "offload error: process on the device %d was unexpectedly terminated",
+ /* 24 msg_c_mic_init3 */ "offload warning: ignoring MIC_STACKSIZE setting; use a value >= 16K and a multiple of 4K",
+ /* 25 msg_c_mic_init4 */ "offload error: thread key create failed with error %d",
+ /* 26 msg_c_mic_init5 */ "offload warning: specify OFFLOAD_DEVICES as comma-separated physical device numbers or 'none'",
+ /* 27 msg_c_mic_init6 */ "offload warning: OFFLOAD_DEVICES device number %d does not correspond to a physical device",
+ /* 28 msg_c_no_static_var_data */ "offload error: cannot find data associated with statically allocated variable %p",
+ /* 29 msg_c_no_ptr_data */ "offload error: cannot find data associated with pointer variable %p",
+ /* 30 msg_c_get_engine_handle */ "offload error: cannot get device %d handle (error code %d)",
+ /* 31 msg_c_get_engine_index */ "offload error: cannot get physical index for logical device %d (error code %d)",
+ /* 32 msg_c_process_create */ "offload error: cannot start process on the device %d (error code %d)",
+ /* 33 msg_c_process_set_cache_size */ "offload error: cannot reserve buffer on the device %d (error code %d)",
+ /* 34 msg_c_process_get_func_handles */ "offload error: cannot get function handles on the device %d (error code %d)",
+ /* 35 msg_c_process_wait_shutdown */ "offload error: wait for process shutdown failed on device %d (error code %d)",
+ /* 36 msg_c_process_proxy_flush */ "offload error: cannot flush process output on device %d (error code %d)",
+ /* 37 msg_c_load_library */ "offload error: cannot load library to the device %d (error code %d)",
+ /* 38 msg_c_pipeline_create */ "offload error: cannot create pipeline on the device %d (error code %d)",
+ /* 39 msg_c_pipeline_run_func */ "offload error: cannot execute function on the device %d (error code %d)",
+ /* 40 msg_c_pipeline_start_run_funcs */ "offload error: cannot start executing pipeline function on the device %d (error code %d)",
+ /* 41 msg_c_buf_create */ "offload error: cannot create buffer on device %d (error code %d)",
+ /* 42 msg_c_buf_create_out_of_mem */ "offload error: cannot create buffer on device %d, out of memory",
+ /* 43 msg_c_buf_create_from_mem */ "offload error: cannot create buffer from memory on device %d (error code %d)",
+ /* 44 msg_c_buf_destroy */ "offload error: buffer destroy failed (error code %d)",
+ /* 45 msg_c_buf_map */ "offload error: buffer map failed (error code %d)",
+ /* 46 msg_c_buf_unmap */ "offload error: buffer unmap failed (error code %d)",
+ /* 47 msg_c_buf_read */ "offload error: buffer read failed (error code %d)",
+ /* 48 msg_c_buf_write */ "offload error: buffer write failed (error code %d)",
+ /* 49 msg_c_buf_copy */ "offload error: buffer copy failed (error code %d)",
+ /* 50 msg_c_buf_get_address */ "offload error: cannot get buffer address on device %d (error code %d)",
+ /* 51 msg_c_buf_add_ref */ "offload error: cannot reuse buffer memory on device %d (error code %d)",
+ /* 52 msg_c_buf_release_ref */ "offload error: cannot release buffer memory on device %d (error code %d)",
+ /* 53 msg_c_buf_set_state */ "offload error: buffer set state failed (error code %d)",
+ /* 54 msg_c_event_wait */ "offload error: wait for event to become signaled failed (error code %d)",
+ /* 55 msg_c_zero_or_neg_ptr_len */ "offload error: memory allocation of zero or negative length is not supported",
+ /* 56 msg_c_zero_or_neg_transfer_size */ "offload error: data transfer of zero or negative size is not supported",
+ /* 57 msg_c_bad_ptr_mem_alloc */ "offload error: allocation (base=%p, size=%d) overlaps with existing allocation (base=%p, size=%d)",
+ /* 58 msg_c_bad_ptr_mem_range */ "offload error: data transfer (base=%p, size=%d) not subset of existing allocation (base=%p, size=%d)",
+ /* 59 msg_c_different_src_and_dstn_sizes */ "offload error: size of the source %d differs from size of the destination %d",
+ /* 60 msg_c_non_contiguous_dope_vector */ "offload error: offload data transfer supports only a single contiguous memory range per variable",
+ /* 61 msg_c_omp_invalid_device_num_env */ "offload warning: ignoring %s setting; use a non-negative integer value",
+ /* 62 msg_c_omp_invalid_device_num */ "offload error: device number should be a non-negative integer value",
+ /* 63 msg_c_unknown_binary_type */ "offload error: unexpected embedded target binary type, expected either an executable or shared library",
+ /* 64 msg_c_multiple_target_exes */ "offload error: more that one target executable found",
+ /* 65 msg_c_no_target_exe */ "offload error: target executable is not available",
+ /* 66 msg_c_report_unknown_timer_node */ "offload error: unknown timer node",
+ /* 67 msg_c_report_unknown_trace_node */ "offload error: unknown trace node",
+ /* 68 msg_c_incorrect_affinity */ "offload error: unknow affinity type %s, specify compact, scatter or balanced",
+ /* 69 msg_c_cannot_set_affinity */ "offload_error: unable to set affinity",
+ /* 70 msg_c_report_host */ "HOST",
+ /* 71 msg_c_report_mic */ "MIC",
+ /* 72 msg_c_report_title */ "timer data (sec)",
+ /* 73 msg_c_report_seconds */ "(seconds)",
+ /* 74 msg_c_report_bytes */ "(bytes)",
+ /* 75 msg_c_report_cpu_time */ "CPU Time",
+ /* 76 msg_c_report_mic_time */ "MIC Time",
+ /* 77 msg_c_report_tag */ "Tag",
+ /* 78 msg_c_report_from_file */ "Offload from file",
+ /* 79 msg_c_report_file */ "File",
+ /* 80 msg_c_report_line */ "Line",
+ /* 81 msg_c_report_cpu_to_mic_data */ "CPU->MIC Data",
+ /* 82 msg_c_report_mic_to_cpu_data */ "MIC->CPU Data",
+ /* 83 msg_c_report_offload */ "Offload",
+ /* 84 msg_c_report_w_tag */ "Tag %d",
+ /* 85 msg_c_report_state */ "State",
+ /* 86 msg_c_report_start */ "Start target",
+ /* 87 msg_c_report_init */ "Initialize",
+ /* 88 msg_c_report_logical_card */ "logical card",
+ /* 89 msg_c_report_physical_card */ "physical card",
+ /* 90 msg_c_report_register */ "Register static data tables",
+ /* 91 msg_c_report_init_func */ "Setup target entry",
+ /* 92 msg_c_report_create_buf_host */ "Create host buffer",
+ /* 93 msg_c_report_create_buf_mic */ "Create target buffer",
+ /* 94 msg_c_report_send_pointer_data */ "Send pointer data",
+ /* 95 msg_c_report_sent_pointer_data */ "Host->target pointer data",
+ /* 96 msg_c_report_gather_copyin_data */ "Gather copyin data",
+ /* 97 msg_c_report_copyin_data */ "Host->target copyin data",
+ /* 98 msg_c_report_state_signal */ "Signal",
+ /* 99 msg_c_report_signal */ "signal :",
+ /* 100 msg_c_report_wait */ "waits :",
+ /* 101 msg_c_report_compute */ "Execute task on target",
+ /* 102 msg_c_report_receive_pointer_data */ "Receive pointer data",
+ /* 103 msg_c_report_received_pointer_data */ "Target->host pointer data",
+ /* 104 msg_c_report_start_target_func */ "Start target entry",
+ /* 105 msg_c_report_var */ "Var",
+ /* 106 msg_c_report_scatter_copyin_data */ "Scatter copyin data",
+ /* 107 msg_c_report_gather_copyout_data */ "Gather copyout data",
+ /* 108 msg_c_report_scatter_copyout_data */ "Scatter copyout data",
+ /* 109 msg_c_report_copyout_data */ "Target->host copyout data",
+ /* 110 msg_c_report_unregister */ "Unregister data tables",
+ /* 111 msg_c_report_destroy */ "Destroy",
+ /* 112 msg_c_report_myoinit */ "Initialize MYO",
+ /* 113 msg_c_report_myoregister */ "Register MYO tables",
+ /* 114 msg_c_report_myofini */ "Finalize MYO",
+ /* 115 msg_c_report_mic_myo_shared */ "MIC MYO shared table register",
+ /* 116 msg_c_report_mic_myo_fptr */ "MIC MYO fptr table register",
+ /* 117 msg_c_report_myosharedmalloc */ "MYO shared malloc",
+ /* 118 msg_c_report_myosharedfree */ "MYO shared free",
+ /* 119 msg_c_report_myosharedalignedmalloc */ "MYO shared aligned malloc",
+ /* 120 msg_c_report_myosharedalignedfree */ "MYO shared aligned free",
+ /* 121 msg_c_report_myoacquire */ "MYO acquire",
+ /* 122 msg_c_report_myorelease */ "MYO release",
+ /* 123 msg_c_report_myosupportsfeature */ "MYO supports feature",
+ /* 124 msg_c_report_myosharedarenacreate */ "MYO shared arena create",
+ /* 125 msg_c_report_myosharedalignedarenamalloc */ "MYO shared aligned arena malloc",
+ /* 126 msg_c_report_myosharedalignedarenafree */ "MYO shared aligned arena free",
+ /* 127 msg_c_report_myoarenaacquire */ "MYO arena acquire",
+ /* 128 msg_c_report_myoarenarelease */ "MYO arena release",
+ /* 129 msg_c_report_host_total_offload_time */ "host: total offload time",
+ /* 130 msg_c_report_host_initialize */ "host: initialize target",
+ /* 131 msg_c_report_host_target_acquire */ "host: acquire target",
+ /* 132 msg_c_report_host_wait_deps */ "host: wait dependencies",
+ /* 133 msg_c_report_host_setup_buffers */ "host: setup buffers",
+ /* 134 msg_c_report_host_alloc_buffers */ "host: allocate buffers",
+ /* 135 msg_c_report_host_setup_misc_data */ "host: setup misc_data",
+ /* 136 msg_c_report_host_alloc_data_buffer */ "host: allocate buffer",
+ /* 137 msg_c_report_host_send_pointers */ "host: send pointers",
+ /* 138 msg_c_report_host_gather_inputs */ "host: gather inputs",
+ /* 139 msg_c_report_host_map_in_data_buffer */ "host: map IN data buffer",
+ /* 140 msg_c_report_host_unmap_in_data_buffer */ "host: unmap IN data buffer",
+ /* 141 msg_c_report_host_start_compute */ "host: initiate compute",
+ /* 142 msg_c_report_host_wait_compute */ "host: wait compute",
+ /* 143 msg_c_report_host_start_buffers_reads */ "host: initiate pointer reads",
+ /* 144 msg_c_report_host_scatter_outputs */ "host: scatter outputs",
+ /* 145 msg_c_report_host_map_out_data_buffer */ "host: map OUT data buffer",
+ /* 146 msg_c_report_host_unmap_out_data_buffer */ "host: unmap OUT data buffer",
+ /* 147 msg_c_report_host_wait_buffers_reads */ "host: wait pointer reads",
+ /* 148 msg_c_report_host_destroy_buffers */ "host: destroy buffers",
+ /* 149 msg_c_report_target_total_time */ "target: total time",
+ /* 150 msg_c_report_target_descriptor_setup */ "target: setup offload descriptor",
+ /* 151 msg_c_report_target_func_lookup */ "target: entry lookup",
+ /* 152 msg_c_report_target_func_time */ "target: entry time",
+ /* 153 msg_c_report_target_scatter_inputs */ "target: scatter inputs",
+ /* 154 msg_c_report_target_add_buffer_refs */ "target: add buffer reference",
+ /* 155 msg_c_report_target_compute */ "target: compute",
+ /* 156 msg_c_report_target_gather_outputs */ "target: gather outputs",
+ /* 157 msg_c_report_target_release_buffer_refs */ "target: remove buffer reference",
+ /* 158 msg_c_coi_pipeline_max_number */ "number of host threads doing offload exceeds maximum of %d",
+ /* 159 msg_c_ranges_dont_match */ "ranges of source and destination don't match together",
+ /* 160 msg_c_destination_is_over */ "insufficient destination memory to transfer source",
+ /* 161 msg_c_slice_of_noncont_array */ "a non-contiguous slice may be taken of contiguous arrays only",
+ /* 162 msg_c_pointer_array_mismatch */ "number of %s elements is less than described by the source",
+ /* 163 msg_c_in_with_preallocated */ "offload error: preallocated targetptr alloc_if(1) may not be used with an in clause",
+ /* 164 msg_c_report_no_host_exe */ "offload error: Cannot find host executable",
+ /* 165 msg_c_report_path_buff_overflow */ "offload error: Size of host executable path exceeded 4KB",
+ /* 166 msg_c_create_pipeline_for_stream */ "offload error: number of cpus exceeds maximum of %d",
+ /* 167 msg_c_offload_no_stream */ "offload error: the stream isn't found on device %d",
+ /* 168 msg_c_get_engine_info */ "offload error: cannot get device %d info (error code %d)",
+ /* 169 msg_c_clear_cpu_mask */ "offload error: cannot clear cpu mask (error code %d)",
+ /* 170 msg_c_set_cpu_mask */ "offload error: cannot set cpu mask (error code %d)",
+ /* 171 msg_c_report_state_stream */ "Stream",
+ /* 172 msg_c_report_stream */ "stream :",
+ /* 173 msg_c_unload_library */ "offload error: cannot unload library from the device %d (error code %d)",
};
diff --git a/liboffloadmic/runtime/mic_lib.f90 b/liboffloadmic/runtime/mic_lib.f90
index c68e059aa19a..1431716777e6 100644
--- a/liboffloadmic/runtime/mic_lib.f90
+++ b/liboffloadmic/runtime/mic_lib.f90
@@ -1,5 +1,5 @@
!
-! Copyright (c) 2014 Intel Corporation. All Rights Reserved.
+! Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
!
! Redistribution and use in source and binary forms, with or without
! modification, are permitted provided that the following conditions
diff --git a/liboffloadmic/runtime/offload.h b/liboffloadmic/runtime/offload.h
index 9234b0011f9d..5ee06fe24c07 100644
--- a/liboffloadmic/runtime/offload.h
+++ b/liboffloadmic/runtime/offload.h
@@ -1,5 +1,5 @@
/*
- Copyright (c) 2014 Intel Corporation. All Rights Reserved.
+ Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
@@ -42,6 +42,13 @@
#include <stddef.h>
#include <omp.h>
+#ifdef TARGET_WINNT
+// <stdint.h> is not compatible with Windows
+typedef unsigned long long int uint64_t;
+#else
+#include <stdint.h>
+#endif // TARGET_WINNT
+
#ifdef __cplusplus
extern "C" {
#endif
@@ -86,6 +93,8 @@ typedef struct {
size_t data_received; /* number of bytes received by host */
} _Offload_status;
+typedef uint64_t _Offload_stream;
+
#define OFFLOAD_STATUS_INIT(x) \
((x).result = OFFLOAD_DISABLED)
@@ -98,14 +107,57 @@ extern int _Offload_number_of_devices(void);
extern int _Offload_get_device_number(void);
extern int _Offload_get_physical_device_number(void);
+/* Offload stream runtime interfaces */
+
+extern _Offload_stream _Offload_stream_create(
+ int device, // MIC device number
+ int number_of_cpus // Cores allocated to the stream
+);
+
+extern int _Offload_stream_destroy(
+ int device, // MIC device number
+ _Offload_stream stream // stream handle
+);
+
+extern int _Offload_stream_completed(
+ int device, // MIC device number
+ _Offload_stream handle // stream handle
+);
+
+/*
+ * _Offload_shared_malloc/free are only supported when offload is enabled
+ * else they are defined to malloc and free
+*/
+#ifdef __INTEL_OFFLOAD
extern void* _Offload_shared_malloc(size_t size);
extern void _Offload_shared_free(void *ptr);
-
extern void* _Offload_shared_aligned_malloc(size_t size, size_t align);
extern void _Offload_shared_aligned_free(void *ptr);
+#else
+#include <malloc.h>
+#define _Offload_shared_malloc(size) malloc(size)
+#define _Offload_shared_free(ptr) free(ptr);
+#if defined(_WIN32)
+#define _Offload_shared_aligned_malloc(size, align) _aligned_malloc(size, align)
+#define _Offload_shared_aligned_free(ptr) _aligned_free(ptr);
+#else
+#define _Offload_shared_aligned_malloc(size, align) memalign(align, size)
+#define _Offload_shared_aligned_free(ptr) free(ptr);
+#endif
+#endif
+
extern int _Offload_signaled(int index, void *signal);
extern void _Offload_report(int val);
+extern int _Offload_find_associated_mic_memory(
+ int target,
+ const void* cpu_addr,
+ void** cpu_base_addr,
+ uint64_t* buf_length,
+ void** mic_addr,
+ uint64_t* mic_buf_start_offset,
+ int* is_static
+);
/* OpenMP API */
@@ -343,7 +395,11 @@ namespace __offload {
shared_allocator<void>::const_pointer) {
/* Allocate from shared memory. */
void *ptr = _Offload_shared_malloc(s*sizeof(T));
- if (ptr == 0) std::__throw_bad_alloc();
+#if (defined(_WIN32) || defined(_WIN64)) /* Windows */
+ if (ptr == 0) throw std::bad_alloc();
+#else
+ if (ptr == 0) std::__throw_bad_alloc();
+#endif
return static_cast<pointer>(ptr);
} /* allocate */
@@ -355,13 +411,13 @@ namespace __offload {
} /* deallocate */
template <typename _T1, typename _T2>
- inline bool operator==(const shared_allocator<_T1> &,
+ inline bool operator==(const shared_allocator<_T1> &,
const shared_allocator<_T2> &) throw() {
return true;
} /* operator== */
template <typename _T1, typename _T2>
- inline bool operator!=(const shared_allocator<_T1> &,
+ inline bool operator!=(const shared_allocator<_T1> &,
const shared_allocator<_T2> &) throw() {
return false;
} /* operator!= */
diff --git a/liboffloadmic/runtime/offload_common.cpp b/liboffloadmic/runtime/offload_common.cpp
index 72c355f6f183..200def566c59 100644
--- a/liboffloadmic/runtime/offload_common.cpp
+++ b/liboffloadmic/runtime/offload_common.cpp
@@ -1,5 +1,5 @@
/*
- Copyright (c) 2014 Intel Corporation. All Rights Reserved.
+ Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
diff --git a/liboffloadmic/runtime/offload_common.h b/liboffloadmic/runtime/offload_common.h
index 60b5045b9b56..0fb66b553b73 100644
--- a/liboffloadmic/runtime/offload_common.h
+++ b/liboffloadmic/runtime/offload_common.h
@@ -1,5 +1,5 @@
/*
- Copyright (c) 2014 Intel Corporation. All Rights Reserved.
+ Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
@@ -40,10 +40,6 @@
#include <string.h>
#include <memory.h>
-#if (defined(LINUX) || defined(FREEBSD)) && !defined(__INTEL_COMPILER)
-#include <mm_malloc.h>
-#endif
-
#include "offload.h"
#include "offload_table.h"
#include "offload_trace.h"
@@ -65,22 +61,24 @@
// The debug routines
// Host console and file logging
-extern int console_enabled;
-extern int offload_report_level;
+DLL_LOCAL extern int console_enabled;
+DLL_LOCAL extern int offload_report_level;
-#define OFFLOAD_DO_TRACE (offload_report_level == 3)
-extern const char *prefix;
-extern int offload_number;
+DLL_LOCAL extern const char *prefix;
+DLL_LOCAL extern int offload_number;
#if !HOST_LIBRARY
-extern int mic_index;
+DLL_LOCAL extern int mic_index;
+#define OFFLOAD_DO_TRACE (offload_report_level == 3)
+#else
+#define OFFLOAD_DO_TRACE (offload_report_enabled && (offload_report_level == 3))
#endif
#if HOST_LIBRARY
-void Offload_Report_Prolog(OffloadHostTimerData* timer_data);
-void Offload_Report_Epilog(OffloadHostTimerData* timer_data);
-void offload_report_free_data(OffloadHostTimerData * timer_data);
-void Offload_Timer_Print(void);
+DLL_LOCAL void Offload_Report_Prolog(OffloadHostTimerData* timer_data);
+DLL_LOCAL void Offload_Report_Epilog(OffloadHostTimerData* timer_data);
+DLL_LOCAL void offload_report_free_data(OffloadHostTimerData * timer_data);
+DLL_LOCAL void Offload_Timer_Print(void);
#ifndef TARGET_WINNT
#define OFFLOAD_DEBUG_INCR_OFLD_NUM() \
@@ -130,7 +128,7 @@ void Offload_Timer_Print(void);
#define OFFLOAD_DEBUG_DUMP_BYTES(level, a, b) \
__dump_bytes(level, a, b)
-extern void __dump_bytes(
+DLL_LOCAL extern void __dump_bytes(
int level,
const void *data,
int len
@@ -156,6 +154,17 @@ extern void *OFFLOAD_MALLOC(size_t size, size_t align);
// The Marshaller
+// Flags describing an offload
+
+//! Flags describing an offload
+union OffloadFlags{
+ uint32_t flags;
+ struct {
+ uint32_t fortran_traceback : 1; //!< Fortran traceback requested
+ uint32_t omp_async : 1; //!< OpenMP asynchronous offload
+ } bits;
+};
+
//! \enum Indicator for the type of entry on an offload item list.
enum OffloadItemType {
c_data = 1, //!< Plain data
@@ -203,6 +212,44 @@ enum OffloadParameterType {
c_parameter_inout //!< Variable listed in "inout" clause
};
+
+//! Flags describing an offloaded variable
+union varDescFlags {
+ struct {
+ //! source variable has persistent storage
+ uint32_t is_static : 1;
+ //! destination variable has persistent storage
+ uint32_t is_static_dstn : 1;
+ //! has length for c_dv && c_dv_ptr
+ uint32_t has_length : 1;
+ //! persisted local scalar is in stack buffer
+ uint32_t is_stack_buf : 1;
+ //! "targetptr" modifier used
+ uint32_t targetptr : 1;
+ //! "preallocated" modifier used
+ uint32_t preallocated : 1;
+ //! Needs documentation
+ uint32_t is_pointer : 1;
+
+ //! buffer address is sent in data
+ uint32_t sink_addr : 1;
+ //! alloc displacement is sent in data
+ uint32_t alloc_disp : 1;
+ //! source data is noncontiguous
+ uint32_t is_noncont_src : 1;
+ //! destination data is noncontiguous
+ uint32_t is_noncont_dst : 1;
+
+ //! "OpenMP always" modifier used
+ uint32_t always_copy : 1;
+ //! "OpenMP delete" modifier used
+ uint32_t always_delete : 1;
+ //! CPU memory pinning/unpinning operation
+ uint32_t pin : 1;
+ };
+ uint32_t bits;
+};
+
//! An Offload Variable descriptor
struct VarDesc {
//! OffloadItemTypes of source and destination
@@ -230,27 +277,7 @@ struct VarDesc {
/*! Used by runtime as offset to data from start of MIC buffer */
uint32_t mic_offset;
//! Flags describing this variable
- union {
- struct {
- //! source variable has persistent storage
- uint32_t is_static : 1;
- //! destination variable has persistent storage
- uint32_t is_static_dstn : 1;
- //! has length for c_dv && c_dv_ptr
- uint32_t has_length : 1;
- //! persisted local scalar is in stack buffer
- uint32_t is_stack_buf : 1;
- //! buffer address is sent in data
- uint32_t sink_addr : 1;
- //! alloc displacement is sent in data
- uint32_t alloc_disp : 1;
- //! source data is noncontiguous
- uint32_t is_noncont_src : 1;
- //! destination data is noncontiguous
- uint32_t is_noncont_dst : 1;
- };
- uint32_t bits;
- } flags;
+ varDescFlags flags;
//! Not used by compiler; set to 0
/*! Used by runtime as offset to base from data stored in a buffer */
int64_t offset;
@@ -472,4 +499,16 @@ struct FunctionDescriptor
// Pointer to OffloadDescriptor.
typedef struct OffloadDescriptor *OFFLOAD;
+// Use for setting affinity of a stream
+enum affinity_type {
+ affinity_compact,
+ affinity_scatter
+};
+struct affinity_spec {
+ uint64_t sink_mask[16];
+ int affinity_type;
+ int num_cores;
+ int num_threads;
+};
+
#endif // OFFLOAD_COMMON_H_INCLUDED
diff --git a/liboffloadmic/runtime/offload_engine.cpp b/liboffloadmic/runtime/offload_engine.cpp
index 2fe0d24430c1..16b440d7ab61 100644
--- a/liboffloadmic/runtime/offload_engine.cpp
+++ b/liboffloadmic/runtime/offload_engine.cpp
@@ -1,5 +1,5 @@
/*
- Copyright (c) 2014 Intel Corporation. All Rights Reserved.
+ Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
@@ -37,6 +37,14 @@
#include "offload_host.h"
#include "offload_table.h"
+#include "offload_iterator.h"
+
+// Static members of Stream class must be described somewhere.
+// This members describe the list of all streams defined in programm
+// via call to _Offload_stream_create.
+uint64_t Stream::m_streams_count = 0;
+StreamMap Stream::all_streams;
+mutex_t Stream::m_stream_lock;
const char* Engine::m_func_names[Engine::c_funcs_total] =
{
@@ -47,7 +55,8 @@ const char* Engine::m_func_names[Engine::c_funcs_total] =
#endif // MYO_SUPPORT
"server_init",
"server_var_table_size",
- "server_var_table_copy"
+ "server_var_table_copy",
+ "server_set_stream_affinity"
};
// Symbolic representation of system signals. Fix for CQ233593
@@ -115,6 +124,7 @@ void Engine::init_process(void)
COIENGINE engine;
COIRESULT res;
const char **environ;
+ char buf[4096]; // For exe path name
// create environment for the target process
environ = (const char**) mic_env_vars.create_environ_for_card(m_index);
@@ -127,39 +137,147 @@ void Engine::init_process(void)
// Create execution context in the specified device
OFFLOAD_DEBUG_TRACE(2, "Getting device %d (engine %d) handle\n", m_index,
m_physical_index);
- res = COI::EngineGetHandle(COI_ISA_KNC, m_physical_index, &engine);
+ res = COI::EngineGetHandle(COI_ISA_MIC, m_physical_index, &engine);
check_result(res, c_get_engine_handle, m_index, res);
- // Target executable should be available by the time when we
- // attempt to initialize the device
+ // Get engine info on threads and cores.
+ // The values of core number and thread number will be used later at stream
+ // creation by call to _Offload_stream_create(device,number_of_cpus).
+
+ COI_ENGINE_INFO engine_info;
+
+ res = COI::EngineGetInfo(engine, sizeof(COI_ENGINE_INFO), &engine_info);
+ check_result(res, c_get_engine_info, m_index, res);
+
+ // m_cpus bitset has 1 for available thread. At the begining all threads
+ // are available and m_cpus(i) is set to
+ // 1 for i = [0...engine_info.NumThreads].
+ m_cpus.reset();
+ for (int i = 0; i < engine_info.NumThreads; i++) {
+ m_cpus.set(i);
+ }
+
+ // The following values will be used at pipeline creation for streams
+ m_num_cores = engine_info.NumCores;
+ m_num_threads = engine_info.NumThreads;
+
+ // Check if OFFLOAD_DMA_CHANNEL_COUNT is set to 2
+ // Only the value 2 is supported in 16.0
+ if (mic_dma_channel_count == 2) {
+ if (COI::ProcessConfigureDMA) {
+ // Set DMA channels using COI API
+ COI::ProcessConfigureDMA(2, COI::DMA_MODE_READ_WRITE);
+ }
+ else {
+ // Set environment variable COI_DMA_CHANNEL_COUNT
+ // use putenv instead of setenv as Windows has no setenv.
+ // Note: putenv requires its argument can't be freed or modified.
+ // So no free after call to putenv or elsewhere.
+ char * env_var = (char*) malloc(sizeof("COI_DMA_CHANNEL_COUNT=2" + 1));
+ sprintf(env_var, "COI_DMA_CHANNEL_COUNT=2");
+ putenv(env_var);
+ }
+ }
+
+ // Target executable is not available then use compiler provided offload_main
if (__target_exe == 0) {
- LIBOFFLOAD_ERROR(c_no_target_exe);
- exit(1);
+ if (mic_device_main == 0)
+ LIBOFFLOAD_ERROR(c_report_no_host_exe);
+
+ OFFLOAD_DEBUG_TRACE(2,
+ "Loading target executable %s\n",mic_device_main);
+
+ res = COI::ProcessCreateFromFile(
+ engine, // in_Engine
+ mic_device_main, // in_pBinaryName
+ 0, // in_Argc
+ 0, // in_ppArgv
+ environ == 0, // in_DupEnv
+ environ, // in_ppAdditionalEnv
+ mic_proxy_io, // in_ProxyActive
+ mic_proxy_fs_root, // in_ProxyfsRoot
+ mic_buffer_size, // in_BufferSpace
+ mic_library_path, // in_LibrarySearchPath
+ &m_process // out_pProcess
+ );
}
+ else {
+ // Target executable should be available by the time when we
+ // attempt to initialize the device
- OFFLOAD_DEBUG_TRACE(2,
- "Loading target executable \"%s\" from %p, size %lld\n",
- __target_exe->name, __target_exe->data, __target_exe->size);
-
- res = COI::ProcessCreateFromMemory(
- engine, // in_Engine
- __target_exe->name, // in_pBinaryName
- __target_exe->data, // in_pBinaryBuffer
- __target_exe->size, // in_BinaryBufferLength,
- 0, // in_Argc
- 0, // in_ppArgv
- environ == 0, // in_DupEnv
- environ, // in_ppAdditionalEnv
- mic_proxy_io, // in_ProxyActive
- mic_proxy_fs_root, // in_ProxyfsRoot
- mic_buffer_size, // in_BufferSpace
- mic_library_path, // in_LibrarySearchPath
- __target_exe->origin, // in_FileOfOrigin
- __target_exe->offset, // in_FileOfOriginOffset
- &m_process // out_pProcess
- );
+ // Need the full path of the FAT exe for VTUNE
+ {
+#ifndef TARGET_WINNT
+ ssize_t len = readlink("/proc/self/exe", buf,1000);
+#else
+ int len = GetModuleFileName(NULL, buf,1000);
+#endif // TARGET_WINNT
+ if (len == -1) {
+ LIBOFFLOAD_ERROR(c_report_no_host_exe);
+ exit(1);
+ }
+ else if (len > 999) {
+ LIBOFFLOAD_ERROR(c_report_path_buff_overflow);
+ exit(1);
+ }
+ buf[len] = '\0';
+ }
+
+ OFFLOAD_DEBUG_TRACE(2,
+ "Loading target executable \"%s\" from %p, size %lld, host file %s\n",
+ __target_exe->name, __target_exe->data, __target_exe->size,
+ buf);
+
+ res = COI::ProcessCreateFromMemory(
+ engine, // in_Engine
+ __target_exe->name, // in_pBinaryName
+ __target_exe->data, // in_pBinaryBuffer
+ __target_exe->size, // in_BinaryBufferLength,
+ 0, // in_Argc
+ 0, // in_ppArgv
+ environ == 0, // in_DupEnv
+ environ, // in_ppAdditionalEnv
+ mic_proxy_io, // in_ProxyActive
+ mic_proxy_fs_root, // in_ProxyfsRoot
+ mic_buffer_size, // in_BufferSpace
+ mic_library_path, // in_LibrarySearchPath
+ buf, // in_FileOfOrigin
+ -1, // in_FileOfOriginOffset use -1 to indicate to
+ // COI that is is a FAT binary
+ &m_process // out_pProcess
+ );
+ }
check_result(res, c_process_create, m_index, res);
+ if ((mic_4k_buffer_size != 0) || (mic_2m_buffer_size !=0)) {
+ // available only in MPSS 4.2 and greater
+ if (COI::ProcessSetCacheSize != 0 ) {
+ int flags;
+ // Need compiler to use MPSS 3.2 or greater to get these
+ // definition so currently hardcoding it
+ // COI_CACHE_ACTION_GROW_NOW && COI_CACHE_MODE_ONDEMAND_SYNC;
+ flags = 0x00020002;
+ res = COI::ProcessSetCacheSize(
+ m_process, // in_Process
+ mic_2m_buffer_size, // in_HugePagePoolSize
+ flags, // inHugeFlags
+ mic_4k_buffer_size, // in_SmallPagePoolSize
+ flags, // inSmallFlags
+ 0, // in_NumDependencies
+ 0, // in_pDependencies
+ 0 // out_PCompletion
+ );
+ OFFLOAD_DEBUG_TRACE(2,
+ "Reserve target buffers 4K pages = %d 2M pages = %d\n",
+ mic_4k_buffer_size, mic_2m_buffer_size);
+ check_result(res, c_process_set_cache_size, m_index, res);
+ }
+ else {
+ OFFLOAD_DEBUG_TRACE(2,
+ "Reserve target buffers not supported in current MPSS\n");
+ }
+ }
+
// get function handles
res = COI::ProcessGetFunctionHandles(m_process, c_funcs_total,
m_func_names, m_funcs);
@@ -226,8 +344,9 @@ void Engine::load_libraries()
// load libraries collected so far
for (TargetImageList::iterator it = m_images.begin();
it != m_images.end(); it++) {
- OFFLOAD_DEBUG_TRACE(2, "Loading library \"%s\" from %p, size %llu\n",
- it->name, it->data, it->size);
+ OFFLOAD_DEBUG_TRACE(2,
+ "Loading library \"%s\" from %p, size %llu, host file %s\n",
+ it->name, it->data, it->size, it->origin);
// load library to the device
COILIBRARY lib;
@@ -238,9 +357,10 @@ void Engine::load_libraries()
it->name,
mic_library_path,
it->origin,
- it->offset,
+ (it->origin) ? -1 : 0,
COI_LOADLIBRARY_V1_FLAGS,
&lib);
+ m_dyn_libs.push_front(DynLib(it->name, it->data, lib));
if (res != COI_SUCCESS && res != COI_ALREADY_EXISTS) {
check_result(res, c_load_library, m_index, res);
@@ -249,6 +369,27 @@ void Engine::load_libraries()
m_images.clear();
}
+void Engine::unload_library(const void *data, const char *name)
+{
+ if (m_process == 0) {
+ return;
+ }
+ for (DynLibList::iterator it = m_dyn_libs.begin();
+ it != m_dyn_libs.end(); it++) {
+ if (it->data == data) {
+ COIRESULT res;
+ OFFLOAD_DEBUG_TRACE(2,
+ "Unloading library \"%s\"\n",name);
+ res = COI::ProcessUnloadLibrary(m_process,it->lib);
+ m_dyn_libs.erase(it);
+ if (res != COI_SUCCESS) {
+ check_result(res, c_unload_library, m_index, res);
+ }
+ return;
+ }
+ }
+}
+
static bool target_entry_cmp(
const VarList::BufEntry &l,
const VarList::BufEntry &r
@@ -273,8 +414,9 @@ void Engine::init_ptr_data(void)
COIEVENT event;
// Prepare table of host entries
- std::vector<const VarTable::Entry*> host_table(__offload_vars.begin(),
- __offload_vars.end());
+ std::vector<const VarTable::Entry*> host_table(
+ Iterator(__offload_vars.get_head()),
+ Iterator());
// no need to do anything further is host table is empty
if (host_table.size() <= 0) {
@@ -348,17 +490,16 @@ void Engine::init_ptr_data(void)
while (hi != he && ti != te) {
int res = strcmp((*hi)->name, reinterpret_cast<const char*>(ti->name));
if (res == 0) {
+ bool is_new;
// add matching entry to var map
- std::pair<PtrSet::iterator, bool> res =
- m_ptr_set.insert(PtrData((*hi)->addr, (*hi)->size));
+ PtrData *ptr = insert_ptr_data((*hi)->addr, (*hi)->size, is_new);
// store address for new entries
- if (res.second) {
- PtrData *ptr = const_cast<PtrData*>(res.first.operator->());
+ if (is_new) {
ptr->mic_addr = ti->addr;
ptr->is_static = true;
}
-
+ ptr->alloc_ptr_data_lock.unlock();
hi++;
ti++;
}
@@ -379,6 +520,7 @@ void Engine::init_ptr_data(void)
}
COIRESULT Engine::compute(
+ _Offload_stream stream,
const std::list<COIBUFFER> &buffers,
const void* data,
uint16_t data_size,
@@ -413,9 +555,11 @@ COIRESULT Engine::compute(
bufs = 0;
flags = 0;
}
-
+ COIPIPELINE pipeline = (stream == no_stream) ?
+ get_pipeline() :
+ get_pipeline(stream);
// start computation
- res = COI::PipelineRunFunction(get_pipeline(),
+ res = COI::PipelineRunFunction(pipeline,
m_funcs[c_func_compute],
num_bufs, bufs, flags,
num_deps, deps,
@@ -528,12 +672,214 @@ COIPIPELINE Engine::get_pipeline(void)
// create pipeline for this thread
res = COI::PipelineCreate(m_process, 0, mic_stack_size, &pipeline);
check_result(res, c_pipeline_create, m_index, res);
-
thread->set_pipeline(m_index, pipeline);
}
return pipeline;
}
+Stream* Stream::find_stream(uint64_t handle, bool remove)
+{
+ Stream *stream = 0;
+
+ m_stream_lock.lock();
+ {
+ StreamMap::iterator it = all_streams.find(handle);
+ if (it != all_streams.end()) {
+ stream = it->second;
+ if (remove) {
+ all_streams.erase(it);
+ }
+ }
+ }
+ m_stream_lock.unlock();
+ return stream;
+}
+
+COIPIPELINE Engine::get_pipeline(_Offload_stream handle)
+{
+ Stream * stream = Stream::find_stream(handle, false);
+
+ if (!stream) {
+ LIBOFFLOAD_ERROR(c_offload_no_stream, m_index);
+ LIBOFFLOAD_ABORT;
+ }
+
+ COIPIPELINE pipeline = stream->get_pipeline();
+
+ if (pipeline == 0) {
+ COIRESULT res;
+ int proc_num;
+ COI_CPU_MASK in_Mask ;
+
+#ifndef TARGET_WINNT
+ proc_num = __sync_fetch_and_add(&m_proc_number, 1);
+#else // TARGET_WINNT
+ proc_num = _InterlockedIncrement(&m_proc_number);
+#endif // TARGET_WINNT
+
+ if (proc_num > COI_PIPELINE_MAX_PIPELINES) {
+ LIBOFFLOAD_ERROR(c_coipipe_max_number, COI_PIPELINE_MAX_PIPELINES);
+ LIBOFFLOAD_ABORT;
+ }
+
+ m_stream_lock.lock();
+
+ // start process if not done yet
+ if (m_process == 0) {
+ init_process();
+ }
+
+ // create CPUmask
+ res = COI::PipelineClearCPUMask(in_Mask);
+ check_result(res, c_clear_cpu_mask, m_index, res);
+
+ int stream_cpu_num = stream->get_cpu_number();
+
+ stream->m_stream_cpus.reset();
+
+ int threads_per_core = m_num_threads / m_num_cores;
+
+ // The "stream_cpu_num" available threads is set in mask.
+ // Available threads are defined by examining of m_cpus bitset.
+ // We skip thread 0 .
+ for (int i = 1; i < m_num_threads; i++) {
+ // for available thread i m_cpus[i] is equal to 1
+ if (m_cpus[i]) {
+ res = COI::PipelineSetCPUMask(m_process,
+ i / threads_per_core,
+ i % threads_per_core,
+ in_Mask);
+
+ check_result(res, c_set_cpu_mask, res);
+ // mark thread i as nonavailable
+ m_cpus.set(i,0);
+ // Mark thread i as given for the stream.
+ // In case of stream destroying by call to
+ // _Offload_stream_destroy we can mark the thread i as
+ // available.
+ stream->m_stream_cpus.set(i);
+ if (--stream_cpu_num <= 0) {
+ break;
+ }
+ }
+ }
+
+ // if stream_cpu_num is greater than 0 there are not enough
+ // available threads
+ if (stream_cpu_num > 0) {
+ LIBOFFLOAD_ERROR(c_create_pipeline_for_stream, m_num_threads);
+ LIBOFFLOAD_ABORT;
+ }
+ // create pipeline for this thread
+ OFFLOAD_DEBUG_TRACE(2, "COIPipelineCreate Mask\n"
+ "%016lx %016lx %016lx %016lx\n%016lx %016lx %016lx %016lx\n"
+ "%016lx %016lx %016lx %016lx\n%016lx %016lx %016lx %016lx\n",
+ in_Mask[0], in_Mask[1], in_Mask[2], in_Mask[3],
+ in_Mask[4], in_Mask[5], in_Mask[6], in_Mask[7],
+ in_Mask[8], in_Mask[9], in_Mask[10], in_Mask[11],
+ in_Mask[12], in_Mask[13], in_Mask[14], in_Mask[15]);
+ res = COI::PipelineCreate(m_process, in_Mask,
+ mic_stack_size, &pipeline);
+ check_result(res, c_pipeline_create, m_index, res);
+
+ // Set stream's affinities
+ {
+ struct affinity_spec affinity_spec;
+ char* affinity_type;
+ int i;
+
+ // "compact" by default
+ affinity_spec.affinity_type = affinity_compact;
+
+ // Check if user has specified type of affinity
+ if ((affinity_type = getenv("OFFLOAD_STREAM_AFFINITY")) !=
+ NULL)
+ {
+ char affinity_str[16];
+ int affinity_str_len;
+
+ OFFLOAD_DEBUG_TRACE(2,
+ "User has specified OFFLOAD_STREAM_AFFINITY=%s\n",
+ affinity_type);
+
+ // Set type of affinity requested
+ affinity_str_len = strlen(affinity_type);
+ for (i=0; i<affinity_str_len && i<15; i++)
+ {
+ affinity_str[i] = tolower(affinity_type[i]);
+ }
+ affinity_str[i] = '\0';
+ if (strcmp(affinity_str, "compact") == 0) {
+ affinity_spec.affinity_type = affinity_compact;
+ OFFLOAD_DEBUG_TRACE(2, "Setting affinity=compact\n");
+ } else if (strcmp(affinity_str, "scatter") == 0) {
+ affinity_spec.affinity_type = affinity_scatter;
+ OFFLOAD_DEBUG_TRACE(2, "Setting affinity=scatter\n");
+ } else {
+ LIBOFFLOAD_ERROR(c_incorrect_affinity, affinity_str);
+ affinity_spec.affinity_type = affinity_compact;
+ OFFLOAD_DEBUG_TRACE(2, "Setting affinity=compact\n");
+ }
+ }
+ // Make flat copy of sink mask because COI's mask is opaque
+ for (i=0; i<16; i++) {
+ affinity_spec.sink_mask[i] = in_Mask[i];
+ }
+ // Set number of cores and threads
+ affinity_spec.num_cores = m_num_cores;
+ affinity_spec.num_threads = m_num_threads;
+
+ COIEVENT event;
+ res = COI::PipelineRunFunction(pipeline,
+ m_funcs[c_func_set_stream_affinity],
+ 0, 0, 0,
+ 0, 0,
+ &affinity_spec, sizeof(affinity_spec),
+ 0, 0,
+ &event);
+ check_result(res, c_pipeline_run_func, m_index, res);
+
+ res = COI::EventWait(1, &event, -1, 1, 0, 0);
+ check_result(res, c_event_wait, res);
+ }
+
+ m_stream_lock.unlock();
+ stream->set_pipeline(pipeline);
+ }
+ return pipeline;
+}
+
+void Engine::stream_destroy(_Offload_stream handle)
+{
+ // get stream
+ Stream * stream = Stream::find_stream(handle, true);
+
+ if (stream) {
+ // return cpus for future use
+ for (int i = 0; i < m_num_threads; i++) {
+ if (stream->m_stream_cpus.test(i)) {
+ m_cpus.set(i);
+ }
+ }
+ delete stream;
+ }
+ else {
+ LIBOFFLOAD_ERROR(c_offload_no_stream, m_index);
+ LIBOFFLOAD_ABORT;
+ }
+}
+
+uint64_t Engine::get_thread_id(void)
+{
+ Thread* thread = (Thread*) thread_getspecific(mic_thread_key);
+ if (thread == 0) {
+ thread = new Thread(&m_proc_number);
+ thread_setspecific(mic_thread_key, thread);
+ }
+
+ return reinterpret_cast<uint64_t>(thread);
+}
+
AutoSet& Engine::get_auto_vars(void)
{
Thread* thread = (Thread*) thread_getspecific(mic_thread_key);
diff --git a/liboffloadmic/runtime/offload_engine.h b/liboffloadmic/runtime/offload_engine.h
index 501890c58342..abd5cc82f305 100644
--- a/liboffloadmic/runtime/offload_engine.h
+++ b/liboffloadmic/runtime/offload_engine.h
@@ -1,5 +1,5 @@
/*
- Copyright (c) 2014 Intel Corporation. All Rights Reserved.
+ Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
@@ -32,13 +32,16 @@
#define OFFLOAD_ENGINE_H_INCLUDED
#include <limits.h>
-
+#include <bitset>
#include <list>
#include <set>
#include <map>
#include "offload_common.h"
#include "coi/coi_client.h"
+#define SIGNAL_IS_REMOVED ((OffloadDescriptor *)-1)
+const int64_t no_stream = -1;
+
// Address range
class MemRange {
public:
@@ -157,6 +160,50 @@ private:
typedef std::list<PtrData*> PtrDataList;
+class PtrDataTable {
+public:
+ typedef std::set<PtrData> PtrSet;
+
+ PtrData* find_ptr_data(const void *ptr) {
+ m_ptr_lock.lock();
+ PtrSet::iterator res = list.find(PtrData(ptr, 0));
+
+ m_ptr_lock.unlock();
+ if (res == list.end()) {
+ return 0;
+ }
+ return const_cast<PtrData*>(res.operator->());
+ }
+
+ PtrData* insert_ptr_data(const void *ptr, uint64_t len, bool &is_new) {
+ m_ptr_lock.lock();
+ std::pair<PtrSet::iterator, bool> res =
+ list.insert(PtrData(ptr, len));
+
+ PtrData* ptr_data = const_cast<PtrData*>(res.first.operator->());
+ m_ptr_lock.unlock();
+
+ is_new = res.second;
+ if (is_new) {
+ // It's necessary to lock as soon as possible.
+ // unlock must be done at call site of insert_ptr_data at
+ // branch for is_new
+ ptr_data->alloc_ptr_data_lock.lock();
+ }
+ return ptr_data;
+ }
+
+ void remove_ptr_data(const void *ptr) {
+ m_ptr_lock.lock();
+ list.erase(PtrData(ptr, 0));
+ m_ptr_lock.unlock();
+ }
+private:
+
+ PtrSet list;
+ mutex_t m_ptr_lock;
+};
+
// Data associated with automatic variable
class AutoData {
public:
@@ -186,7 +233,15 @@ public:
return _InterlockedDecrement(&ref_count);
#endif // TARGET_WINNT
}
-
+
+ long nullify_reference() {
+#ifndef TARGET_WINNT
+ return __sync_lock_test_and_set(&ref_count, 0);
+#else // TARGET_WINNT
+ return _InterlockedExchange(&ref_count,0);
+#endif // TARGET_WINNT
+ }
+
long get_reference() const {
return ref_count;
}
@@ -226,18 +281,39 @@ struct TargetImage
typedef std::list<TargetImage> TargetImageList;
+// dynamic library and Image associated with lib
+struct DynLib
+{
+ DynLib(const char *_name, const void *_data,
+ COILIBRARY _lib) :
+ name(_name), data(_data), lib(_lib)
+ {}
+ // library name
+ const char* name;
+
+ // contents
+ const void* data;
+
+ COILIBRARY lib;
+};
+typedef std::list<DynLib> DynLibList;
+
// Data associated with persistent auto objects
struct PersistData
{
- PersistData(const void *addr, uint64_t routine_num, uint64_t size) :
- stack_cpu_addr(addr), routine_id(routine_num)
+ PersistData(const void *addr, uint64_t routine_num,
+ uint64_t size, uint64_t thread) :
+ stack_cpu_addr(addr), routine_id(routine_num), thread_id(thread)
{
stack_ptr_data = new PtrData(0, size);
}
- // 1-st key value - begining of the stack at CPU
+ // 1-st key value - beginning of the stack at CPU
const void * stack_cpu_addr;
// 2-nd key value - identifier of routine invocation at CPU
uint64_t routine_id;
+ // 3-rd key value - thread identifier
+ uint64_t thread_id;
+
// corresponded PtrData; only stack_ptr_data->mic_buf is used
PtrData * stack_ptr_data;
// used to get offset of the variable in stack buffer
@@ -246,6 +322,75 @@ struct PersistData
typedef std::list<PersistData> PersistDataList;
+// Data associated with stream
+struct Stream
+{
+ Stream(int device, int num_of_cpus) :
+ m_number_of_cpus(num_of_cpus), m_pipeline(0), m_last_offload(0),
+ m_device(device)
+ {}
+ ~Stream() {
+ if (m_pipeline) {
+ COI::PipelineDestroy(m_pipeline);
+ }
+ }
+
+ COIPIPELINE get_pipeline(void) {
+ return(m_pipeline);
+ }
+
+ int get_device(void) {
+ return(m_device);
+ }
+
+ int get_cpu_number(void) {
+ return(m_number_of_cpus);
+ }
+
+ void set_pipeline(COIPIPELINE pipeline) {
+ m_pipeline = pipeline;
+ }
+
+ OffloadDescriptor* get_last_offload(void) {
+ return(m_last_offload);
+ }
+
+ void set_last_offload(OffloadDescriptor* last_offload) {
+ m_last_offload = last_offload;
+ }
+
+ static Stream* find_stream(uint64_t handle, bool remove);
+
+ static _Offload_stream add_stream(int device, int number_of_cpus) {
+ m_stream_lock.lock();
+ all_streams[++m_streams_count] = new Stream(device, number_of_cpus);
+ m_stream_lock.unlock();
+ return(m_streams_count);
+ }
+
+ typedef std::map<uint64_t, Stream*> StreamMap;
+
+ static uint64_t m_streams_count;
+ static StreamMap all_streams;
+ static mutex_t m_stream_lock;
+
+ int m_device;
+
+ // number of cpus
+ int m_number_of_cpus;
+
+ // The pipeline associated with the stream
+ COIPIPELINE m_pipeline;
+
+ // The last offload occured via the stream
+ OffloadDescriptor* m_last_offload;
+
+ // Cpus used by the stream
+ std::bitset<COI_MAX_HW_THREADS> m_stream_cpus;
+};
+
+typedef std::map<uint64_t, Stream*> StreamMap;
+
// class representing a single engine
struct Engine {
friend void __offload_init_library_once(void);
@@ -275,9 +420,14 @@ struct Engine {
return m_process;
}
+ uint64_t get_thread_id(void);
+
// initialize device
void init(void);
+ // unload library
+ void unload_library(const void *data, const char *name);
+
// add new library
void add_lib(const TargetImage &lib)
{
@@ -288,6 +438,7 @@ struct Engine {
}
COIRESULT compute(
+ _Offload_stream stream,
const std::list<COIBUFFER> &buffers,
const void* data,
uint16_t data_size,
@@ -323,36 +474,28 @@ struct Engine {
// Memory association table
//
PtrData* find_ptr_data(const void *ptr) {
- m_ptr_lock.lock();
- PtrSet::iterator res = m_ptr_set.find(PtrData(ptr, 0));
- m_ptr_lock.unlock();
- if (res == m_ptr_set.end()) {
- return 0;
- }
- return const_cast<PtrData*>(res.operator->());
+ return m_ptr_set.find_ptr_data(ptr);
+ }
+
+ PtrData* find_targetptr_data(const void *ptr) {
+ return m_targetptr_set.find_ptr_data(ptr);
}
PtrData* insert_ptr_data(const void *ptr, uint64_t len, bool &is_new) {
- m_ptr_lock.lock();
- std::pair<PtrSet::iterator, bool> res =
- m_ptr_set.insert(PtrData(ptr, len));
- PtrData* ptr_data = const_cast<PtrData*>(res.first.operator->());
- m_ptr_lock.unlock();
+ return m_ptr_set.insert_ptr_data(ptr, len, is_new);
+ }
- is_new = res.second;
- if (is_new) {
- // It's necessary to lock as soon as possible.
- // unlock must be done at call site of insert_ptr_data at
- // branch for is_new
- ptr_data->alloc_ptr_data_lock.lock();
- }
- return ptr_data;
+ PtrData* insert_targetptr_data(const void *ptr, uint64_t len,
+ bool &is_new) {
+ return m_targetptr_set.insert_ptr_data(ptr, len, is_new);
}
void remove_ptr_data(const void *ptr) {
- m_ptr_lock.lock();
- m_ptr_set.erase(PtrData(ptr, 0));
- m_ptr_lock.unlock();
+ m_ptr_set.remove_ptr_data(ptr);
+ }
+
+ void remove_targetptr_data(const void *ptr) {
+ m_targetptr_set.remove_ptr_data(ptr);
}
//
@@ -396,7 +539,7 @@ struct Engine {
if (it != m_signal_map.end()) {
desc = it->second;
if (remove) {
- m_signal_map.erase(it);
+ it->second = SIGNAL_IS_REMOVED;
}
}
}
@@ -405,6 +548,14 @@ struct Engine {
return desc;
}
+ void stream_destroy(_Offload_stream handle);
+
+ COIPIPELINE get_pipeline(_Offload_stream stream);
+
+ StreamMap get_stream_map() {
+ return m_stream_map;
+ }
+
// stop device process
void fini_process(bool verbose);
@@ -417,6 +568,11 @@ private:
{}
~Engine() {
+ for (StreamMap::iterator it = m_stream_map.begin();
+ it != m_stream_map.end(); it++) {
+ Stream * stream = it->second;
+ delete stream;
+ }
if (m_process != 0) {
fini_process(false);
}
@@ -469,14 +625,24 @@ private:
// List of libraries to be loaded
TargetImageList m_images;
- // var table
- PtrSet m_ptr_set;
- mutex_t m_ptr_lock;
+ // var tables
+ PtrDataTable m_ptr_set;
+ PtrDataTable m_targetptr_set;
// signals
SignalMap m_signal_map;
mutex_t m_signal_lock;
+ // streams
+ StreamMap m_stream_map;
+ mutex_t m_stream_lock;
+ int m_num_cores;
+ int m_num_threads;
+ std::bitset<COI_MAX_HW_THREADS> m_cpus;
+
+ // List of dynamic libraries to be registred
+ DynLibList m_dyn_libs;
+
// constants for accessing device function handles
enum {
c_func_compute = 0,
@@ -487,6 +653,7 @@ private:
c_func_init,
c_func_var_table_size,
c_func_var_table_copy,
+ c_func_set_stream_affinity,
c_funcs_total
};
static const char* m_func_names[c_funcs_total];
diff --git a/liboffloadmic/runtime/offload_env.cpp b/liboffloadmic/runtime/offload_env.cpp
index 447c6edf74e4..79f5f3667547 100644
--- a/liboffloadmic/runtime/offload_env.cpp
+++ b/liboffloadmic/runtime/offload_env.cpp
@@ -1,5 +1,5 @@
/*
- Copyright (c) 2014 Intel Corporation. All Rights Reserved.
+ Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
@@ -146,7 +146,7 @@ void MicEnvVar::add_env_var(
else {
card = get_card(card_number);
if (!card) {
- // definition for new card occured
+ // definition for new card occurred
card = new CardEnvVars(card_number);
card_spec_list.push_back(card);
}
@@ -321,7 +321,7 @@ void MicEnvVar::mic_parse_env_var_list(
// Collect all definitions for the card with number "card_num".
// The returned result is vector of string pointers defining one
// environment variable. The vector is terminated by NULL pointer.
-// In the begining of the vector there are env vars defined as
+// In the beginning of the vector there are env vars defined as
// <mic-prefix>_<card-number>_<var>=<value>
// or
// <mic-prefix>_<card-number>_ENV=<env-vars>
diff --git a/liboffloadmic/runtime/offload_env.h b/liboffloadmic/runtime/offload_env.h
index e60e8601e9ca..01138c2d4c39 100644
--- a/liboffloadmic/runtime/offload_env.h
+++ b/liboffloadmic/runtime/offload_env.h
@@ -1,5 +1,5 @@
/*
- Copyright (c) 2014 Intel Corporation. All Rights Reserved.
+ Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
@@ -32,6 +32,7 @@
#define OFFLOAD_ENV_H_INCLUDED
#include <list>
+#include "offload_util.h"
// data structure and routines to parse MIC user environment and pass to MIC
@@ -43,7 +44,7 @@ enum MicEnvVarKind
c_mic_card_env // for <mic-prefix>_<card-number>_ENV
};
-struct MicEnvVar {
+struct DLL_LOCAL MicEnvVar {
public:
MicEnvVar() : prefix(0) {}
~MicEnvVar();
diff --git a/liboffloadmic/runtime/offload_host.cpp b/liboffloadmic/runtime/offload_host.cpp
index 23a873f38860..08f626f457e5 100644
--- a/liboffloadmic/runtime/offload_host.cpp
+++ b/liboffloadmic/runtime/offload_host.cpp
@@ -1,5 +1,5 @@
/*
- Copyright (c) 2014 Intel Corporation. All Rights Reserved.
+ Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
@@ -28,7 +28,8 @@
*/
-// Forward declaration as the following 2 functions are declared as friend in offload_engine.h
+// Forward declaration as the following 2 functions are declared as friend
+// in offload_engine.h.
// CLANG does not like static to been after friend declaration.
static void __offload_init_library_once(void);
static void __offload_fini_library(void);
@@ -63,6 +64,55 @@ static void __offload_fini_library(void);
#define GET_OFFLOAD_NUMBER(timer_data) \
timer_data? timer_data->offload_number : 0
+extern "C" {
+#ifdef TARGET_WINNT
+// Windows does not support imports from libraries without actually
+// including them as dependence. We don't want to include in the
+// dependence since is it used only for Fortran when traceback is enabled.
+// Chose to implement it with GetProcAddress.
+#define FORTRAN_TRACE_BACK win_for__continue_traceback
+int win_for__continue_traceback( _Offload_result coi_offload_result )
+{
+ HINSTANCE hDLL;
+ int (* TraceBackRoutine)(_Offload_result value);
+
+ hDLL = LoadLibrary("libifcoremd.dll");
+ if (hDLL != 0) {
+ TraceBackRoutine = (int (*)(_Offload_result)) GetProcAddress(hDLL,
+ "for__continue_traceback");
+ if (TraceBackRoutine != 0) {
+ return TraceBackRoutine(coi_offload_result);
+ }
+ else {
+ OFFLOAD_TRACE(3,
+ "Cannot find for__continue_traceback routine in libifcorert.dll\n");
+ exit(1);
+ }
+ }
+ else {
+ OFFLOAD_TRACE(3, "Cannot load libifcorert.dll\n");
+ exit(1);
+ }
+ return 0;
+}
+
+#else // TARGET_WINNT
+
+#define FORTRAN_TRACE_BACK for__continue_traceback
+
+// for__continue_traceback is provided as a dummy to resolve link time symbols
+// for C/C++ programs. For Fortran the actual fortran library function in
+// libifcore.so is used.
+#pragma weak for__continue_traceback
+int for__continue_traceback( _Offload_result coi_offload_result )
+{
+ OFFLOAD_TRACE(3,
+ "liboffload function for_continue_traceback should not be called.\n");
+ exit(1);
+}
+#endif //TARGET_WINNT
+} // extern "C"
+
#ifdef TARGET_WINNT
// Small subset of ELF declarations for Windows which is needed to compile
// this file. ELF header is used to understand what binary type is contained
@@ -104,7 +154,16 @@ int offload_number = 0;
static const char *htrace_envname = "H_TRACE";
static const char *offload_report_envname = "OFFLOAD_REPORT";
-static char *timer_envname = "H_TIME";
+static const char *timer_envname = "H_TIME";
+
+// location of offload_main executable
+// To be used if the main application has no offload and is not built
+// with -offload but dynamic library linked in has offload pragma
+char* mic_device_main = 0;
+
+// DMA channel count used by COI and set via
+// OFFLOAD_DMA_CHANNEL_COUNT environment variable
+uint32_t mic_dma_channel_count;
// Trace information
static const char* vardesc_direction_as_string[] = {
@@ -146,6 +205,13 @@ uint32_t mic_stack_size = 12 * 1024 * 1024;
// MIC_BUFFERSIZE
uint64_t mic_buffer_size = 0;
+// Preallocated 4K page memory size for buffers on MIC
+uint64_t mic_4k_buffer_size = 0;
+
+// Preallocated 2M page memory size for buffers on MIC
+uint64_t mic_2m_buffer_size = 0;
+
+
// MIC_LD_LIBRARY_PATH
char* mic_library_path = 0;
@@ -183,6 +249,15 @@ static const char *offload_active_wait_envname = "OFFLOAD_ACTIVE_WAIT";
int __omp_device_num = 0;
static const char *omp_device_num_envname = "OMP_DEFAULT_DEVICE";
+//OFFLOAD_PARALLEL_COPY
+static bool __offload_parallel_copy = false;
+static const char *parallel_copy_envname = "OFFLOAD_PARALLEL_COPY";
+
+//Use COI interface for noncontiguous transfer if it exists.
+static bool __offload_use_coi_noncontiguous_transfer = false;
+static const char *use_coi_noncontiguous_transfer_envname =
+ "MIC_USE_COI_MULTI_D";
+
// The list of pending target libraries
static bool __target_libs;
static TargetImageList __target_libs_list;
@@ -192,6 +267,112 @@ static mutex_t stack_alloc_lock;
// Target executable
TargetImage* __target_exe;
+// Print readable offload flags
+static void trace_offload_flags(
+ OffloadHostTimerData* timer_data,
+ OffloadFlags offload_flags
+)
+{
+ // Sized big enough for all flag names
+ char fbuffer[256];
+ bool first = true;
+ if (!OFFLOAD_DO_TRACE && (console_enabled >= 1)) {
+ sprintf(fbuffer, " OffloadFlags=(");
+ if (offload_flags.bits.fortran_traceback) {
+ sprintf(fbuffer+strlen(fbuffer), "fortran_traceback");
+ first = false;
+ }
+ if (offload_flags.bits.omp_async) {
+ sprintf(fbuffer+strlen(fbuffer), first ? "omp_async" : ",omp_async");
+ first = false;
+ }
+ OFFLOAD_DEBUG_TRACE_1(1,
+ GET_OFFLOAD_NUMBER(timer_data), c_offload_init_func,
+ "%s)\n", fbuffer);
+ }
+}
+
+// Print readable varDesc flags
+static void trace_varDesc_flags(
+ OffloadHostTimerData* timer_data,
+ varDescFlags offload_flags
+)
+{
+ // SIzed big enough for all flag names
+ char fbuffer[256];
+ bool first = true;
+ if (!OFFLOAD_DO_TRACE && (console_enabled >= 1)) {
+ sprintf(fbuffer, " varDescFlags=(");
+ if (offload_flags.is_static) {
+ sprintf(fbuffer+strlen(fbuffer), "is_static");
+ first = false;
+ }
+ if (offload_flags.is_static_dstn) {
+ sprintf(fbuffer+strlen(fbuffer),
+ first ? "is_static_dstn" : ",is_static_dstn");
+ first = false;
+ }
+ if (offload_flags.has_length) {
+ sprintf(fbuffer+strlen(fbuffer),
+ first ? "has_length" : ",has_length");
+ first = false;
+ }
+ if (offload_flags.is_stack_buf) {
+ sprintf(fbuffer+strlen(fbuffer),
+ first ? "is_stack_buf" : ",is_stack_buf");
+ first = false;
+ }
+ if (offload_flags.targetptr) {
+ sprintf(fbuffer+strlen(fbuffer),
+ first ? "targetptr" : ",targetptr");
+ first = false;
+ }
+ if (offload_flags.preallocated) {
+ sprintf(fbuffer+strlen(fbuffer),
+ first ? "preallocated" : ",preallocated");
+ first = false;
+ }
+ if (offload_flags.is_pointer) {
+ sprintf(fbuffer+strlen(fbuffer),
+ first ? "is_pointer" : ",is_pointer");
+ first = false;
+ }
+ if (offload_flags.sink_addr) {
+ sprintf(fbuffer+strlen(fbuffer),
+ first ? "sink_addr" : ",sink_addr");
+ first = false;
+ }
+ if (offload_flags.alloc_disp) {
+ sprintf(fbuffer+strlen(fbuffer),
+ first ? "alloc_disp" : ",alloc_disp");
+ first = false;
+ }
+ if (offload_flags.is_noncont_src) {
+ sprintf(fbuffer+strlen(fbuffer),
+ first ? "is_noncont_src" : ",is_noncont_src");
+ first = false;
+ }
+ if (offload_flags.is_noncont_dst) {
+ sprintf(fbuffer+strlen(fbuffer),
+ first ? "is_noncont_dst" : ",is_noncont_dst");
+ first = false;
+ }
+ if (offload_flags.always_copy) {
+ sprintf(fbuffer+strlen(fbuffer),
+ first ? "always_copy" : ",always_copy");
+ first = false;
+ }
+ if (offload_flags.always_delete) {
+ sprintf(fbuffer+strlen(fbuffer),
+ first ? "always_delete" : ",always_delete");
+ first = false;
+ }
+ OFFLOAD_DEBUG_TRACE_1(1,
+ GET_OFFLOAD_NUMBER(timer_data), c_offload_init_func,
+ "%s)\n", fbuffer);
+ }
+}
+
static char * offload_get_src_base(void * ptr, uint8_t type)
{
char *base;
@@ -204,7 +385,7 @@ static char * offload_get_src_base(void * ptr, uint8_t type)
else if (VAR_TYPE_IS_DV_DATA_SLICE(type) || VAR_TYPE_IS_DV_DATA(type)) {
ArrDesc *dvp;
if (VAR_TYPE_IS_DV_DATA_SLICE(type)) {
- const arr_desc *ap = static_cast<const arr_desc*>(ptr);
+ const Arr_Desc *ap = static_cast<const Arr_Desc*>(ptr);
dvp = (type == c_dv_data_slice) ?
reinterpret_cast<ArrDesc*>(ap->base) :
*reinterpret_cast<ArrDesc**>(ap->base);
@@ -278,130 +459,228 @@ _Offload_result OffloadDescriptor::translate_coi_error(COIRESULT res) const
}
}
+// is_targetptr == 0 && is_prealloc == 0 - allocation of pointer data;
+// is_targetptr == 1 && is_prealloc == 0 - allocation of target memory:
+// allocate memory at target; use its value as base in target table.
+// is_targetptr == 1 && is_prealloc == 1 - use preallocated target memory:
+// base - is address at target of preallocated memory; use its value as
+// base in target table.
+
bool OffloadDescriptor::alloc_ptr_data(
PtrData* &ptr_data,
void *base,
int64_t disp,
int64_t size,
int64_t alloc_disp,
- int align
+ int align,
+ bool is_targptr,
+ bool is_prealloc,
+ bool pin
)
{
// total length of base
- int64_t length = disp + size;
+ int64_t length = size;
bool is_new;
+ COIBUFFER targptr_buf;
+ COIRESULT res;
+ uint32_t buffer_flags = 0;
+ char * base_disp = reinterpret_cast<char *>(base) + disp;
- OFFLOAD_TRACE(3, "Creating association for data: addr %p, length %lld\n",
- base, length);
+ // create buffer with large pages if data length exceeds
+ // large page threshold
+ if (length >= __offload_use_2mb_buffers) {
+ buffer_flags = COI_OPTIMIZE_HUGE_PAGE_SIZE;
+ }
+ // Allocate memory at target for targetptr without preallocated as we need
+ // its address as base argument in call to m_device.insert_ptr_data
+ if (is_targptr && !is_prealloc) {
+ length = alloc_disp ? length : size + disp;
+ res = COI::BufferCreate(
+ length,
+ COI_BUFFER_NORMAL,
+ buffer_flags,
+ 0,
+ 1,
+ &m_device.get_process(),
+ &targptr_buf);
+ if (res != COI_SUCCESS) {
+ if (m_status != 0) {
+ m_status->result = translate_coi_error(res);
+ }
+ else if (m_is_mandatory) {
+ report_coi_error(c_buf_create, res);
+ }
+ return false;
+ }
+
+ res = COI::BufferGetSinkAddress(
+ targptr_buf, reinterpret_cast<uint64_t *>(&base));
+ if (res != COI_SUCCESS) {
+ if (m_status != 0) {
+ m_status->result = translate_coi_error(res);
+ }
+ else if (m_is_mandatory) {
+ report_coi_error(c_buf_get_address, res);
+ }
+ return false;
+ }
+ }
+ OFFLOAD_TRACE(3, "Creating association for data: addr %p, length %lld\n",
+ alloc_disp ? base : base_disp,
+ alloc_disp ? length : size + disp);
+
// add new entry
- ptr_data = m_device.insert_ptr_data(base, length, is_new);
+
+ ptr_data = is_targptr ?
+ m_device.find_targetptr_data(base_disp) :
+ m_device.find_ptr_data(base_disp);
+ // if ptr_data is found just need to check it for overlapping
+ if (ptr_data) {
+ is_new = false;
+ base = base_disp;
+ }
+ else {
+ // If association is not found we must create it.
+ length = alloc_disp ? length : size + disp;
+ ptr_data = is_targptr ?
+ m_device.insert_targetptr_data(base, length, is_new) :
+ m_device.insert_ptr_data(base, length, is_new);
+ }
if (is_new) {
OFFLOAD_TRACE(3, "Added new association\n");
if (length > 0) {
OffloadTimer timer(get_timer_data(), c_offload_host_alloc_buffers);
- COIRESULT res;
// align should be a power of 2
- if (align > 0 && (align & (align - 1)) == 0) {
+ if (!pin && !is_targptr &&
+ align > 0 && (align & (align - 1)) == 0) {
// offset within mic_buffer. Can do offset optimization
// only when source address alignment satisfies requested
// alignment on the target (cq172736).
if ((reinterpret_cast<intptr_t>(base) & (align - 1)) == 0) {
- ptr_data->mic_offset = reinterpret_cast<intptr_t>(base) & 4095;
+ ptr_data->mic_offset =
+ reinterpret_cast<intptr_t>(base) & 4095;
}
}
// buffer size and flags
uint64_t buffer_size = length + ptr_data->mic_offset;
- uint32_t buffer_flags = 0;
- // create buffer with large pages if data length exceeds
- // large page threshold
- if (length >= __offload_use_2mb_buffers) {
- buffer_flags = COI_OPTIMIZE_HUGE_PAGE_SIZE;
- }
-
- // create CPU buffer
- OFFLOAD_DEBUG_TRACE_1(3,
+ // For targetptr there is no CPU buffer
+ if (pin || !is_targptr) {
+ // create CPU buffer
+ OFFLOAD_DEBUG_TRACE_1(3,
GET_OFFLOAD_NUMBER(get_timer_data()),
c_offload_create_buf_host,
"Creating buffer from source memory %p, "
"length %lld\n", base, length);
- // result is not checked because we can continue without cpu
- // buffer. In this case we will use COIBufferRead/Write instead
- // of COIBufferCopy.
- COI::BufferCreateFromMemory(length,
+ // result is not checked because we can continue without cpu
+ // buffer. In this case we will use COIBufferRead/Write
+ // instead of COIBufferCopy.
+
+ COI::BufferCreateFromMemory(length,
COI_BUFFER_NORMAL,
0,
base,
1,
&m_device.get_process(),
&ptr_data->cpu_buf);
+ }
- OFFLOAD_DEBUG_TRACE_1(3,
+ // create MIC buffer
+ if (is_prealloc) {
+ OFFLOAD_DEBUG_TRACE_1(3,
GET_OFFLOAD_NUMBER(get_timer_data()),
c_offload_create_buf_mic,
- "Creating buffer for sink: size %lld, offset %d, "
- "flags =0x%x\n", buffer_size - alloc_disp,
+ "Creating buffer from sink memory: size %lld, offset %d, "
+ "flags =0x%x\n", buffer_size,
ptr_data->mic_offset, buffer_flags);
-
- // create MIC buffer
- res = COI::BufferCreate(buffer_size - alloc_disp,
- COI_BUFFER_NORMAL,
- buffer_flags,
- 0,
- 1,
- &m_device.get_process(),
- &ptr_data->mic_buf);
- if (res != COI_SUCCESS) {
- if (m_status != 0) {
- m_status->result = translate_coi_error(res);
- }
- else if (m_is_mandatory) {
- report_coi_error(c_buf_create, res);
+ res = COI::BufferCreateFromMemory(ptr_data->cpu_addr.length(),
+ COI_BUFFER_NORMAL,
+ COI_SINK_MEMORY,
+ base,
+ 1,
+ &m_device.get_process(),
+ &ptr_data->mic_buf);
+ if (res != COI_SUCCESS) {
+ if (m_status != 0) {
+ m_status->result = translate_coi_error(res);
+ }
+ else if (m_is_mandatory) {
+ report_coi_error(c_buf_create, res);
+ }
+ ptr_data->alloc_ptr_data_lock.unlock();
+ return false;
}
- ptr_data->alloc_ptr_data_lock.unlock();
- return false;
}
-
- // make buffer valid on the device.
- res = COI::BufferSetState(ptr_data->mic_buf,
- m_device.get_process(),
- COI_BUFFER_VALID,
- COI_BUFFER_NO_MOVE,
- 0, 0, 0);
- if (res != COI_SUCCESS) {
- if (m_status != 0) {
- m_status->result = translate_coi_error(res);
- }
- else if (m_is_mandatory) {
- report_coi_error(c_buf_set_state, res);
+ else if (is_targptr) {
+ ptr_data->mic_buf = targptr_buf;
+ }
+ else if (!pin) {
+ OFFLOAD_DEBUG_TRACE_1(3,
+ GET_OFFLOAD_NUMBER(get_timer_data()),
+ c_offload_create_buf_mic,
+ "Creating buffer for sink: size %lld, offset %d, "
+ "flags =0x%x\n", buffer_size,
+ ptr_data->mic_offset, buffer_flags);
+ res = COI::BufferCreate(buffer_size,
+ COI_BUFFER_NORMAL,
+ buffer_flags,
+ 0,
+ 1,
+ &m_device.get_process(),
+ &ptr_data->mic_buf);
+ if (res != COI_SUCCESS) {
+ if (m_status != 0) {
+ m_status->result = translate_coi_error(res);
+ }
+ else if (m_is_mandatory) {
+ report_coi_error(c_buf_create, res);
+ }
+ ptr_data->alloc_ptr_data_lock.unlock();
+ return false;
}
- ptr_data->alloc_ptr_data_lock.unlock();
- return false;
}
- res = COI::BufferSetState(ptr_data->mic_buf,
- COI_PROCESS_SOURCE,
- COI_BUFFER_INVALID,
- COI_BUFFER_NO_MOVE,
- 0, 0, 0);
- if (res != COI_SUCCESS) {
- if (m_status != 0) {
- m_status->result = translate_coi_error(res);
+ if (!pin) {
+ // make buffer valid on the device.
+ res = COI::BufferSetState(ptr_data->mic_buf,
+ m_device.get_process(),
+ COI_BUFFER_VALID,
+ COI_BUFFER_NO_MOVE,
+ 0, 0, 0);
+ if (res != COI_SUCCESS) {
+ if (m_status != 0) {
+ m_status->result = translate_coi_error(res);
+ }
+ else if (m_is_mandatory) {
+ report_coi_error(c_buf_set_state, res);
+ }
+ ptr_data->alloc_ptr_data_lock.unlock();
+ return false;
}
- else if (m_is_mandatory) {
- report_coi_error(c_buf_set_state, res);
+
+ res = COI::BufferSetState(ptr_data->mic_buf,
+ COI_PROCESS_SOURCE,
+ COI_BUFFER_INVALID,
+ COI_BUFFER_NO_MOVE,
+ 0, 0, 0);
+ if (res != COI_SUCCESS) {
+ if (m_status != 0) {
+ m_status->result = translate_coi_error(res);
+ }
+ else if (m_is_mandatory) {
+ report_coi_error(c_buf_set_state, res);
+ }
+ ptr_data->alloc_ptr_data_lock.unlock();
+ return false;
}
- ptr_data->alloc_ptr_data_lock.unlock();
- return false;
}
}
-
ptr_data->alloc_disp = alloc_disp;
ptr_data->alloc_ptr_data_lock.unlock();
}
@@ -415,9 +694,11 @@ bool OffloadDescriptor::alloc_ptr_data(
// This is not a new entry. Make sure that provided address range fits
// into existing one.
- MemRange addr_range(base, length - ptr_data->alloc_disp);
+ MemRange addr_range(base, length);
if (!ptr_data->cpu_addr.contains(addr_range)) {
- LIBOFFLOAD_ERROR(c_bad_ptr_mem_range);
+ LIBOFFLOAD_ERROR(c_bad_ptr_mem_alloc, base, length,
+ const_cast<void *>(ptr_data->cpu_addr.start()),
+ ptr_data->cpu_addr.length());
exit(1);
}
@@ -433,20 +714,24 @@ bool OffloadDescriptor::alloc_ptr_data(
bool OffloadDescriptor::find_ptr_data(
PtrData* &ptr_data,
- void *base,
+ void *in_base,
int64_t disp,
int64_t size,
+ bool is_targetptr,
bool report_error
)
{
// total length of base
- int64_t length = disp + size;
-
+ int64_t length = size;
+ char *base = reinterpret_cast<char *>(in_base) + disp;
+
OFFLOAD_TRACE(3, "Looking for association for data: addr %p, "
"length %lld\n", base, length);
// find existing association in pointer table
- ptr_data = m_device.find_ptr_data(base);
+ ptr_data = is_targetptr ?
+ m_device.find_targetptr_data(base) :
+ m_device.find_ptr_data(base);
if (ptr_data == 0) {
if (report_error) {
LIBOFFLOAD_ERROR(c_no_ptr_data, base);
@@ -464,7 +749,9 @@ bool OffloadDescriptor::find_ptr_data(
MemRange addr_range(base, length);
if (!ptr_data->cpu_addr.contains(addr_range)) {
if (report_error) {
- LIBOFFLOAD_ERROR(c_bad_ptr_mem_range);
+ LIBOFFLOAD_ERROR(c_bad_ptr_mem_range, base, length,
+ const_cast<void *>(ptr_data->cpu_addr.start()),
+ ptr_data->cpu_addr.length());
exit(1);
}
OFFLOAD_TRACE(3, "Existing association partially overlaps with "
@@ -591,6 +878,7 @@ bool OffloadDescriptor::offload_stack_memory_manager(
PersistDataList::iterator it_begin = m_device.m_persist_list.begin();
PersistDataList::iterator it_end;
int erase = 0;
+ uint64_t cur_thread_id = m_device.get_thread_id();
*is_new = false;
@@ -600,9 +888,11 @@ bool OffloadDescriptor::offload_stack_memory_manager(
if (stack_begin > it->stack_cpu_addr) {
// this stack data must be destroyed
- m_destroy_stack.push_front(cur_el.stack_ptr_data);
- it_end = it;
- erase++;
+ if (cur_thread_id == cur_el.thread_id) {
+ m_destroy_stack.push_front(cur_el.stack_ptr_data);
+ it_end = it;
+ erase++;
+ }
}
else if (stack_begin == it->stack_cpu_addr) {
if (routine_id != it-> routine_id) {
@@ -627,7 +917,8 @@ bool OffloadDescriptor::offload_stack_memory_manager(
return true;
}
}
- else if (stack_begin < it->stack_cpu_addr) {
+ else if (stack_begin < it->stack_cpu_addr &&
+ cur_thread_id == cur_el.thread_id) {
break;
}
}
@@ -638,7 +929,7 @@ bool OffloadDescriptor::offload_stack_memory_manager(
m_in_datalen += erase * sizeof(new_el->stack_ptr_data->mic_addr);
}
// new stack table is created
- new_el = new PersistData(stack_begin, routine_id, buf_size);
+ new_el = new PersistData(stack_begin, routine_id, buf_size, cur_thread_id);
// create MIC buffer
COIRESULT res;
uint32_t buffer_flags = 0;
@@ -733,11 +1024,13 @@ bool OffloadDescriptor::setup_descriptors(
}
// dependencies
- m_in_deps = (COIEVENT*) malloc(sizeof(COIEVENT) * (m_vars_total + 1));
+ m_in_deps_allocated = m_vars_total + 1;
+ m_in_deps = (COIEVENT*) malloc(sizeof(COIEVENT) * m_in_deps_allocated);
if (m_in_deps == NULL)
LIBOFFLOAD_ERROR(c_malloc);
if (m_vars_total > 0) {
- m_out_deps = (COIEVENT*) malloc(sizeof(COIEVENT) * m_vars_total);
+ m_out_deps_allocated = m_vars_total;
+ m_out_deps = (COIEVENT*) malloc(sizeof(COIEVENT) * m_out_deps_allocated);
if (m_out_deps == NULL)
LIBOFFLOAD_ERROR(c_malloc);
}
@@ -752,7 +1045,7 @@ bool OffloadDescriptor::setup_descriptors(
for (int i = 0; i < m_vars_total; i++) {
void* alloc_base = NULL;
int64_t alloc_disp = 0;
- int64_t alloc_size;
+ int64_t alloc_size = 0;
bool src_is_for_mic = (m_vars[i].direction.out ||
m_vars[i].into == NULL);
@@ -787,25 +1080,41 @@ bool OffloadDescriptor::setup_descriptors(
m_vars[i].count,
m_vars[i].ptr,
m_vars[i].into);
+ // If any varDesc flags bits set, show them
+ if (console_enabled >= 1 && m_vars[i].flags.bits != 0) {
+ trace_varDesc_flags(get_timer_data(), m_vars[i].flags);
+ }
+ // preallocated implies targetptr
+ if (m_vars[i].flags.preallocated) {
+ // targetptr preallocated alloc_if(1) may not be used with
+ // an in clause
+ if (m_vars[i].direction.in && m_vars[i].alloc_if) {
+ LIBOFFLOAD_ERROR(c_in_with_preallocated);
+ exit(1);
+ }
+ m_vars[i].flags.targetptr = 1;
+ }
if (m_vars[i].alloc != NULL) {
// array descriptor
- const arr_desc *ap =
- static_cast<const arr_desc*>(m_vars[i].alloc);
+ const Arr_Desc *ap =
+ static_cast<const Arr_Desc*>(m_vars[i].alloc);
// debug dump
- __arr_desc_dump(" ", "ALLOC", ap, 0);
+ ARRAY_DESC_DUMP(" ", "ALLOC", ap, 0, 1);
__arr_data_offset_and_length(ap, alloc_disp, alloc_size);
alloc_base = reinterpret_cast<void*>(ap->base);
}
+ m_vars_extra[i].alloc = m_vars[i].alloc;
m_vars_extra[i].cpu_disp = 0;
m_vars_extra[i].cpu_offset = 0;
m_vars_extra[i].src_data = 0;
m_vars_extra[i].read_rng_src = 0;
m_vars_extra[i].read_rng_dst = 0;
+ m_vars_extra[i].omp_last_event_type = c_last_not;
// flag is_arr_ptr_el is 1 only for var_descs generated
// for c_data_ptr_array type
if (i < vars_total) {
@@ -815,7 +1124,7 @@ bool OffloadDescriptor::setup_descriptors(
switch (m_vars[i].type.src) {
case c_data_ptr_array:
{
- const arr_desc *ap;
+ const Arr_Desc *ap;
const VarDesc3 *vd3 =
static_cast<const VarDesc3*>(m_vars[i].ptr);
int flags = vd3->array_fields;
@@ -824,32 +1133,33 @@ bool OffloadDescriptor::setup_descriptors(
OFFLOAD_TRACE(2,
" pointer array type is %s\n",
vardesc_type_as_string[flags & 0x3f]);
- ap = static_cast<const arr_desc*>(vd3->ptr_array);
- __arr_desc_dump(" ", "ptr array", ap, 0);
+ ap = static_cast<const Arr_Desc*>(vd3->ptr_array);
+ ARRAY_DESC_DUMP(" ", "ptr array", ap,
+ m_vars[i].flags.is_pointer, 1);
if (m_vars[i].into) {
- ap = static_cast<const arr_desc*>(m_vars[i].into);
- __arr_desc_dump(
- " ", "into array", ap, 0);
+ ap = static_cast<const Arr_Desc*>(m_vars[i].into);
+ ARRAY_DESC_DUMP(
+ " ", "into array", ap, 0, 1);
}
if ((flags & (1<<flag_align_is_array)) != 0) {
- ap = static_cast<const arr_desc*>(vd3->align_array);
- __arr_desc_dump(
- " ", "align array", ap, 0);
+ ap = static_cast<const Arr_Desc*>(vd3->align_array);
+ ARRAY_DESC_DUMP(
+ " ", "align array", ap, 0, 1);
}
if ((flags & (1<<flag_alloc_if_is_array)) != 0) {
- ap = static_cast<const arr_desc*>(vd3->alloc_if_array);
- __arr_desc_dump(
- " ", "alloc_if array", ap, 0);
+ ap = static_cast<const Arr_Desc*>(vd3->alloc_if_array);
+ ARRAY_DESC_DUMP(
+ " ", "alloc_if array", ap, 0, 1);
}
if ((flags & (1<<flag_free_if_is_array)) != 0) {
- ap = static_cast<const arr_desc*>(vd3->free_if_array);
- __arr_desc_dump(
- " ", "free_if array", ap, 0);
+ ap = static_cast<const Arr_Desc*>(vd3->free_if_array);
+ ARRAY_DESC_DUMP(
+ " ", "free_if array", ap, 0, 1);
}
if ((flags & (1<<flag_extent_start_is_array)) != 0) {
- ap = static_cast<const arr_desc*>(vd3->extent_start);
- __arr_desc_dump(
- " ", "extent_start array", ap, 0);
+ ap = static_cast<const Arr_Desc*>(vd3->extent_start);
+ ARRAY_DESC_DUMP(
+ " ", "extent_start array", ap, 0, 1);
} else if ((flags &
(1<<flag_extent_start_is_scalar)) != 0) {
OFFLOAD_TRACE(2,
@@ -857,10 +1167,10 @@ bool OffloadDescriptor::setup_descriptors(
(int64_t)vd3->extent_start);
}
if ((flags & (1<<flag_extent_elements_is_array)) != 0) {
- ap = static_cast<const arr_desc*>
+ ap = static_cast<const Arr_Desc*>
(vd3->extent_elements);
- __arr_desc_dump(
- " ", "extent_elements array", ap, 0);
+ ARRAY_DESC_DUMP(" ",
+ "extent_elements array", ap, 0, 1);
} else if ((flags &
(1<<flag_extent_elements_is_scalar)) != 0) {
OFFLOAD_TRACE(2,
@@ -868,9 +1178,9 @@ bool OffloadDescriptor::setup_descriptors(
(int64_t)vd3->extent_elements);
}
if ((flags & (1<<flag_into_start_is_array)) != 0) {
- ap = static_cast<const arr_desc*>(vd3->into_start);
- __arr_desc_dump(
- " ", "into_start array", ap, 0);
+ ap = static_cast<const Arr_Desc*>(vd3->into_start);
+ ARRAY_DESC_DUMP(
+ " ", "into_start array", ap, 0, 1);
} else if ((flags &
(1<<flag_into_start_is_scalar)) != 0) {
OFFLOAD_TRACE(2,
@@ -878,9 +1188,9 @@ bool OffloadDescriptor::setup_descriptors(
(int64_t)vd3->into_start);
}
if ((flags & (1<<flag_into_elements_is_array)) != 0) {
- ap = static_cast<const arr_desc*>(vd3->into_elements);
- __arr_desc_dump(
- " ", "into_elements array", ap, 0);
+ ap = static_cast<const Arr_Desc*>(vd3->into_elements);
+ ARRAY_DESC_DUMP(
+ " ", "into_elements array", ap, 0, 1);
} else if ((flags &
(1<<flag_into_elements_is_scalar)) != 0) {
OFFLOAD_TRACE(2,
@@ -888,9 +1198,9 @@ bool OffloadDescriptor::setup_descriptors(
(int64_t)vd3->into_elements);
}
if ((flags & (1<<flag_alloc_start_is_array)) != 0) {
- ap = static_cast<const arr_desc*>(vd3->alloc_start);
- __arr_desc_dump(
- " ", "alloc_start array", ap, 0);
+ ap = static_cast<const Arr_Desc*>(vd3->alloc_start);
+ ARRAY_DESC_DUMP(
+ " ", "alloc_start array", ap, 0, 1);
} else if ((flags &
(1<<flag_alloc_start_is_scalar)) != 0) {
OFFLOAD_TRACE(2,
@@ -898,9 +1208,9 @@ bool OffloadDescriptor::setup_descriptors(
(int64_t)vd3->alloc_start);
}
if ((flags & (1<<flag_alloc_elements_is_array)) != 0) {
- ap = static_cast<const arr_desc*>(vd3->alloc_elements);
- __arr_desc_dump(
- " ", "alloc_elements array", ap, 0);
+ ap = static_cast<const Arr_Desc*>(vd3->alloc_elements);
+ ARRAY_DESC_DUMP(" ",
+ "alloc_elements array", ap, 0, 1);
} else if ((flags &
(1<<flag_alloc_elements_is_scalar)) != 0) {
OFFLOAD_TRACE(2,
@@ -922,11 +1232,11 @@ bool OffloadDescriptor::setup_descriptors(
// VarDesc.disp will have an offset from base
if (m_vars[i].type.src == c_cean_var) {
// array descriptor
- const arr_desc *ap =
- static_cast<const arr_desc*>(m_vars[i].ptr);
+ const Arr_Desc *ap =
+ static_cast<const Arr_Desc*>(m_vars[i].ptr);
// debug dump
- __arr_desc_dump("", "IN/OUT", ap, 0);
+ ARRAY_DESC_DUMP("", "IN/OUT", ap, 0, !src_is_for_mic);
// offset and length are derived from the array descriptor
__arr_data_offset_and_length(ap, m_vars[i].disp,
@@ -961,7 +1271,7 @@ bool OffloadDescriptor::setup_descriptors(
m_vars[i].ptr,
m_vars[i].disp,
m_vars[i].size,
- false)) {
+ false, false)) {
return false;
}
@@ -983,10 +1293,11 @@ bool OffloadDescriptor::setup_descriptors(
if (m_is_openmp) {
if (m_vars[i].flags.is_static) {
- // Static data is transferred only by omp target
+ // Static data is transferred either by omp target
// update construct which passes zeros for
- // alloc_if and free_if.
- if (m_vars[i].alloc_if || m_vars[i].free_if) {
+ // alloc_if and free_if or by always modifier.
+ if (!m_vars[i].flags.always_copy &&
+ (m_vars[i].alloc_if || m_vars[i].free_if)) {
m_vars[i].direction.bits = c_parameter_nocopy;
}
}
@@ -1004,10 +1315,12 @@ bool OffloadDescriptor::setup_descriptors(
m_vars[i].ptr);
}
- // For automatic variables data is transferred
- // only if alloc_if == 0 && free_if == 0
- // or reference count is 1
- if ((m_vars[i].alloc_if || m_vars[i].free_if) &&
+ // For automatic variables data is transferred:
+ // - if always modifier is used OR
+ // - if alloc_if == 0 && free_if == 0 OR
+ // - if reference count is 1
+ if (!m_vars[i].flags.always_copy &&
+ (m_vars[i].alloc_if || m_vars[i].free_if) &&
auto_data != 0 &&
auto_data->get_reference() != 1) {
m_vars[i].direction.bits = c_parameter_nocopy;
@@ -1088,8 +1401,12 @@ bool OffloadDescriptor::setup_descriptors(
}
m_vars[i].size = m_destroy_stack.size();
m_vars_extra[i].src_data = m_stack_ptr_data;
- // need to add reference for buffer
- m_need_runfunction = true;
+
+ // need to add or remove references for stack buffer at target
+ if (is_new || m_destroy_stack.size()) {
+ m_need_runfunction = true;
+ }
+
break;
}
/* fallthru */
@@ -1098,11 +1415,11 @@ bool OffloadDescriptor::setup_descriptors(
case c_dv_ptr:
if (m_vars[i].type.src == c_cean_var_ptr) {
// array descriptor
- const arr_desc *ap =
- static_cast<const arr_desc*>(m_vars[i].ptr);
+ const Arr_Desc *ap =
+ static_cast<const Arr_Desc*>(m_vars[i].ptr);
// debug dump
- __arr_desc_dump("", "IN/OUT", ap, 1);
+ ARRAY_DESC_DUMP("", "IN/OUT", ap, 1, !src_is_for_mic);
// offset and length are derived from the array descriptor
__arr_data_offset_and_length(ap, m_vars[i].disp,
@@ -1145,9 +1462,10 @@ bool OffloadDescriptor::setup_descriptors(
m_vars[i].free_if) {
PtrData *ptr_data;
- // check that buffer length >= 0
+ // check that buffer length > 0
if (m_vars[i].alloc_if &&
- m_vars[i].disp + m_vars[i].size < 0) {
+ m_vars[i].disp + m_vars[i].size <
+ (m_is_openmp ? 0 : 1)) {
LIBOFFLOAD_ERROR(c_zero_or_neg_ptr_len);
exit(1);
}
@@ -1166,20 +1484,34 @@ bool OffloadDescriptor::setup_descriptors(
m_vars[i].flags.sink_addr = 1;
}
else if (m_vars[i].alloc_if) {
+ if (m_vars[i].flags.preallocated) {
+ m_out_datalen += sizeof(void*);
+ m_need_runfunction = true;
+ break;
+ }
// add new entry
if (!alloc_ptr_data(
ptr_data,
- base,
+ reinterpret_cast<char *>(base) + alloc_disp,
(alloc_base != NULL) ?
alloc_disp : m_vars[i].disp,
(alloc_base != NULL) ?
alloc_size : m_vars[i].size,
alloc_disp,
(alloc_base != NULL) ?
- 0 : m_vars[i].align)) {
+ 0 : m_vars[i].align,
+ m_vars[i].flags.targetptr,
+ 0,
+ m_vars[i].flags.pin)) {
return false;
}
-
+ if (m_vars[i].flags.targetptr) {
+ if (!init_mic_address(ptr_data)) {
+ return false;
+ }
+ *static_cast<void**>(m_vars[i].ptr) = base =
+ reinterpret_cast<void*>(ptr_data->mic_addr);
+ }
if (ptr_data->add_reference() == 0 &&
ptr_data->mic_buf != 0) {
// add buffer to the list of buffers that
@@ -1187,12 +1519,14 @@ bool OffloadDescriptor::setup_descriptors(
m_compute_buffers.push_back(
ptr_data->mic_buf);
}
- else {
+ else if (!m_vars[i].flags.pin &&
+ !m_vars[i].flags.preallocated) {
// will send buffer address to device
m_vars[i].flags.sink_addr = 1;
}
- if (!ptr_data->is_static) {
+ if (!m_vars[i].flags.pin &&
+ !ptr_data->is_static) {
// need to add reference for buffer
m_need_runfunction = true;
}
@@ -1202,8 +1536,9 @@ bool OffloadDescriptor::setup_descriptors(
if (m_is_openmp) {
// For omp target update variable is ignored
// if it does not exist.
- if (!m_vars[i].alloc_if &&
- !m_vars[i].free_if) {
+ if (m_vars[i].flags.always_copy ||
+ (!m_vars[i].alloc_if &&
+ !m_vars[i].free_if)) {
error_if_not_found = false;
}
}
@@ -1213,6 +1548,7 @@ bool OffloadDescriptor::setup_descriptors(
base,
m_vars[i].disp,
m_vars[i].size,
+ m_vars[i].flags.targetptr,
error_if_not_found)) {
return false;
}
@@ -1235,9 +1571,10 @@ bool OffloadDescriptor::setup_descriptors(
// data is transferred only if
// alloc_if == 0 && free_if == 0
// or reference count is 1
- if ((m_vars[i].alloc_if ||
- m_vars[i].free_if) &&
- ptr_data->get_reference() != 1) {
+ if (!m_vars[i].flags.always_copy &&
+ ((m_vars[i].alloc_if ||
+ m_vars[i].free_if) &&
+ ptr_data->get_reference() != 1)) {
m_vars[i].direction.bits =
c_parameter_nocopy;
}
@@ -1257,7 +1594,8 @@ bool OffloadDescriptor::setup_descriptors(
m_in_datalen += sizeof(ptr_data->mic_addr);
}
- if (!ptr_data->is_static && m_vars[i].free_if) {
+ if (!m_vars[i].flags.pin &&
+ !ptr_data->is_static && m_vars[i].free_if) {
// need to decrement buffer reference on target
m_need_runfunction = true;
}
@@ -1277,7 +1615,7 @@ bool OffloadDescriptor::setup_descriptors(
base,
m_vars[i].disp,
m_vars[i].size,
- false)) {
+ false, false)) {
return false;
}
if (ptr_data) {
@@ -1308,8 +1646,8 @@ bool OffloadDescriptor::setup_descriptors(
case c_dv_ptr_data_slice:
ArrDesc *dvp;
if (VAR_TYPE_IS_DV_DATA_SLICE(m_vars[i].type.src)) {
- const arr_desc *ap;
- ap = static_cast<const arr_desc*>(m_vars[i].ptr);
+ const Arr_Desc *ap;
+ ap = static_cast<const Arr_Desc*>(m_vars[i].ptr);
dvp = (m_vars[i].type.src == c_dv_data_slice) ?
reinterpret_cast<ArrDesc*>(ap->base) :
@@ -1331,13 +1669,13 @@ bool OffloadDescriptor::setup_descriptors(
if (m_vars[i].direction.bits ||
m_vars[i].alloc_if ||
m_vars[i].free_if) {
- const arr_desc *ap;
+ const Arr_Desc *ap;
if (VAR_TYPE_IS_DV_DATA_SLICE(m_vars[i].type.src)) {
- ap = static_cast<const arr_desc*>(m_vars[i].ptr);
+ ap = static_cast<const Arr_Desc*>(m_vars[i].ptr);
// debug dump
- __arr_desc_dump("", "IN/OUT", ap, 0);
+ ARRAY_DESC_DUMP("", "IN/OUT", ap, 0, !src_is_for_mic);
}
if (!__dv_is_contiguous(dvp)) {
m_vars[i].flags.is_noncont_src = 1;
@@ -1393,14 +1731,17 @@ bool OffloadDescriptor::setup_descriptors(
// add new entry
if (!alloc_ptr_data(
ptr_data,
- base,
+ reinterpret_cast<char *>(base) + alloc_disp,
(alloc_base != NULL) ?
alloc_disp : m_vars[i].disp,
(alloc_base != NULL) ?
alloc_size : m_vars[i].size,
alloc_disp,
(alloc_base != NULL) ?
- 0 : m_vars[i].align)) {
+ 0 : m_vars[i].align,
+ m_vars[i].flags.targetptr,
+ m_vars[i].flags.preallocated,
+ m_vars[i].flags.pin)) {
return false;
}
@@ -1426,8 +1767,9 @@ bool OffloadDescriptor::setup_descriptors(
if (m_is_openmp) {
// For omp target update variable is ignored
// if it does not exist.
- if (!m_vars[i].alloc_if &&
- !m_vars[i].free_if) {
+ if (m_vars[i].flags.always_copy ||
+ (!m_vars[i].alloc_if &&
+ !m_vars[i].free_if)) {
error_if_not_found = false;
}
}
@@ -1437,6 +1779,7 @@ bool OffloadDescriptor::setup_descriptors(
base,
m_vars[i].disp,
m_vars[i].size,
+ m_vars[i].flags.targetptr,
error_if_not_found)) {
return false;
}
@@ -1457,10 +1800,12 @@ bool OffloadDescriptor::setup_descriptors(
if (ptr_data != 0) {
if (m_is_openmp) {
- // data is transferred only if
- // alloc_if == 0 && free_if == 0
- // or reference count is 1
- if ((m_vars[i].alloc_if ||
+ // data is transferred if
+ // - if always modifier is used OR
+ // - if alloc_if == 0 && free_if == 0 OR
+ // - if reference count is 1
+ if (!m_vars[i].flags.always_copy &&
+ (m_vars[i].alloc_if ||
m_vars[i].free_if) &&
ptr_data->get_reference() != 1) {
m_vars[i].direction.bits =
@@ -1503,7 +1848,7 @@ bool OffloadDescriptor::setup_descriptors(
base,
m_vars[i].disp,
m_vars[i].size,
- false)) {
+ false, false)) {
return false;
}
m_vars[i].offset = !ptr_data ? 0 :
@@ -1551,11 +1896,11 @@ bool OffloadDescriptor::setup_descriptors(
if (m_vars[i].type.dst == c_cean_var) {
// array descriptor
- const arr_desc *ap =
- static_cast<const arr_desc*>(m_vars[i].into);
+ const Arr_Desc *ap =
+ static_cast<const Arr_Desc*>(m_vars[i].into);
// debug dump
- __arr_desc_dump(" ", "INTO", ap, 0);
+ ARRAY_DESC_DUMP(" ", "INTO", ap, 0, src_is_for_mic);
// offset and length are derived from the array descriptor
__arr_data_offset_and_length(ap, into_disp, size);
@@ -1594,7 +1939,7 @@ bool OffloadDescriptor::setup_descriptors(
// find data associated with variable
if (!find_ptr_data(ptr_data, m_vars[i].into,
- into_disp, size, false)) {
+ into_disp, size, false, false)) {
return false;
}
if (ptr_data != 0) {
@@ -1648,11 +1993,11 @@ bool OffloadDescriptor::setup_descriptors(
if (m_vars[i].type.dst == c_cean_var_ptr) {
// array descriptor
- const arr_desc *ap =
- static_cast<const arr_desc*>(m_vars[i].into);
+ const Arr_Desc *ap =
+ static_cast<const Arr_Desc*>(m_vars[i].into);
// debug dump
- __arr_desc_dump(" ", "INTO", ap, 1);
+ ARRAY_DESC_DUMP(" ", "INTO", ap, 1, src_is_for_mic);
// offset and length are derived from the array descriptor
__arr_data_offset_and_length(ap, into_disp, size);
@@ -1713,20 +2058,34 @@ bool OffloadDescriptor::setup_descriptors(
m_vars[i].flags.sink_addr = 1;
}
else if (m_vars[i].alloc_if) {
+ if (m_vars[i].flags.preallocated) {
+ m_out_datalen += sizeof(void*);
+ m_need_runfunction = true;
+ break;
+ }
// add new entry
if (!alloc_ptr_data(
ptr_data,
- base,
+ reinterpret_cast<char *>(base) + alloc_disp,
(alloc_base != NULL) ?
alloc_disp : into_disp,
(alloc_base != NULL) ?
alloc_size : size,
alloc_disp,
(alloc_base != NULL) ?
- 0 : m_vars[i].align)) {
+ 0 : m_vars[i].align,
+ m_vars[i].flags.targetptr,
+ m_vars[i].flags.preallocated,
+ m_vars[i].flags.pin)) {
return false;
}
-
+ if (m_vars[i].flags.targetptr) {
+ if (!init_mic_address(ptr_data)) {
+ return false;
+ }
+ *static_cast<void**>(m_vars[i].into) = base =
+ reinterpret_cast<void*>(ptr_data->mic_addr);
+ }
if (ptr_data->add_reference() == 0 &&
ptr_data->mic_buf != 0) {
// add buffer to the list of buffers that
@@ -1746,7 +2105,8 @@ bool OffloadDescriptor::setup_descriptors(
}
else {
// use existing association from pointer table
- if (!find_ptr_data(ptr_data, base, into_disp, size)) {
+ if (!find_ptr_data(ptr_data, base, into_disp,
+ size, m_vars[i].flags.targetptr, true)) {
return false;
}
m_vars[i].flags.sink_addr = 1;
@@ -1780,7 +2140,7 @@ bool OffloadDescriptor::setup_descriptors(
base,
into_disp,
m_vars[i].size,
- false)) {
+ false, false)) {
return false;
}
}
@@ -1806,17 +2166,17 @@ bool OffloadDescriptor::setup_descriptors(
if (m_vars[i].direction.bits ||
m_vars[i].alloc_if ||
m_vars[i].free_if) {
- const arr_desc *ap;
+ const Arr_Desc *ap;
ArrDesc *dvp;
PtrData *ptr_data;
int64_t disp;
int64_t size;
if (VAR_TYPE_IS_DV_DATA_SLICE(m_vars[i].type.dst)) {
- ap = static_cast<const arr_desc*>(m_vars[i].into);
+ ap = static_cast<const Arr_Desc*>(m_vars[i].into);
// debug dump
- __arr_desc_dump(" ", "INTO", ap, 0);
+ ARRAY_DESC_DUMP(" ", "INTO", ap, 0, src_is_for_mic);
dvp = (m_vars[i].type.dst == c_dv_data_slice) ?
reinterpret_cast<ArrDesc*>(ap->base) :
@@ -1889,14 +2249,17 @@ bool OffloadDescriptor::setup_descriptors(
// add new entry
if (!alloc_ptr_data(
ptr_data,
- base,
+ reinterpret_cast<char *>(base) + alloc_disp,
(alloc_base != NULL) ?
alloc_disp : into_disp,
(alloc_base != NULL) ?
alloc_size : size,
alloc_disp,
(alloc_base != NULL) ?
- 0 : m_vars[i].align)) {
+ 0 : m_vars[i].align,
+ m_vars[i].flags.targetptr,
+ m_vars[i].flags.preallocated,
+ m_vars[i].flags.pin)) {
return false;
}
if (ptr_data->add_reference() == 0 &&
@@ -1918,7 +2281,8 @@ bool OffloadDescriptor::setup_descriptors(
}
else {
// use existing association from pointer table
- if (!find_ptr_data(ptr_data, base, into_disp, size)) {
+ if (!find_ptr_data(ptr_data, base, into_disp,
+ size, m_vars[i].flags.targetptr, true)) {
return false;
}
@@ -1958,7 +2322,7 @@ bool OffloadDescriptor::setup_descriptors(
base,
into_disp,
size,
- false)) {
+ false, false)) {
return false;
}
into_offset = !ptr_data ?
@@ -2062,9 +2426,10 @@ bool OffloadDescriptor::setup_misc_data(const char *name)
if (m_func_desc == NULL)
LIBOFFLOAD_ERROR(c_malloc);
m_func_desc->console_enabled = console_enabled;
- m_func_desc->timer_enabled =
- timer_enabled || (offload_report_level && offload_report_enabled);
- m_func_desc->offload_report_level = offload_report_level;
+ m_func_desc->timer_enabled = offload_report_enabled &&
+ (timer_enabled || offload_report_level);
+ m_func_desc->offload_report_level = offload_report_enabled ?
+ offload_report_level : 0;
m_func_desc->offload_number = GET_OFFLOAD_NUMBER(get_timer_data());
m_func_desc->in_datalen = m_in_datalen;
m_func_desc->out_datalen = m_out_datalen;
@@ -2078,35 +2443,193 @@ bool OffloadDescriptor::setup_misc_data(const char *name)
return true;
}
+void OffloadDescriptor::setup_omp_async_info()
+{
+ OFFLOAD_TRACE(2, "setup_omp_async_info\n");
+ OmpAsyncLastEventType event_type = m_need_runfunction ?
+ c_last_runfunc : c_last_write;
+ int last_in = m_need_runfunction ? 0 : -1;
+ int i;
+
+ for (i = m_vars_total - 1; i >=0; i--) {
+ switch (m_vars[i].type.dst) {
+ case c_data:
+ case c_void_ptr:
+ case c_cean_var:
+ if (m_vars[i].direction.out &&
+ m_vars[i].flags.is_static_dstn) {
+ event_type = c_last_read;
+ }
+ else if (last_in < 0 && m_vars[i].direction.in &&
+ m_vars[i].flags.is_static_dstn) {
+ last_in = i;
+ }
+ break;
+ case c_string_ptr:
+ case c_data_ptr:
+ case c_cean_var_ptr:
+ case c_dv_ptr:
+ case c_dv_data:
+ case c_dv_ptr_data:
+ case c_dv_data_slice:
+ case c_dv_ptr_data_slice:
+
+ if (m_vars[i].direction.out) {
+ event_type = c_last_read;
+ }
+ else if (last_in < 0 && m_vars[i].direction.in) {
+ last_in = i;
+ }
+ break;
+ default:
+ break;
+ }
+ if (event_type == c_last_read) {
+ break;
+ }
+ }
+
+ if (event_type == c_last_read) {
+ m_vars_extra[i].omp_last_event_type = c_last_read;
+ }
+ else if (event_type == c_last_write) {
+ m_vars_extra[last_in].omp_last_event_type = c_last_write;
+ }
+ m_omp_async_last_event_type = event_type;
+ OFFLOAD_TRACE(2, "setup_omp_async_info: event_type=%d\n",
+ m_omp_async_last_event_type);
+}
+
+extern "C" {
+ void offload_proxy_task_completed_ooo(
+ COIEVENT e,
+ const COIRESULT r,
+ const void *info
+ )
+ {
+ /* TODO: Call callback function, pass info. */
+ }
+}
+
+void OffloadDescriptor::register_omp_event_call_back(
+ const COIEVENT *event,
+ const void *info)
+{
+ OFFLOAD_TRACE(2, "register_omp_event_call_back(event=%p, info=%p)\n",
+ event, info);
+ if (COI::EventRegisterCallback) {
+ COI::EventRegisterCallback(
+ *event,
+ &offload_proxy_task_completed_ooo,
+ info, 0);
+ OFFLOAD_TRACE(2,
+ "COI::EventRegisterCallback found; callback registered\n");
+ }
+}
+
bool OffloadDescriptor::wait_dependencies(
- const void **waits,
- int num_waits
+ const void **waits,
+ int num_waits,
+ _Offload_stream handle
)
{
OffloadTimer timer(get_timer_data(), c_offload_host_wait_deps);
bool ret = true;
+ OffloadDescriptor *task;
+ if (num_waits == 0) {
+ return true;
+ }
- for (int i = 0; i < num_waits; i++) {
+ // wait for streams
+ if (num_waits == -1) {
+ Stream * stream;
+ // some specific stream of the device
+ if (handle != 0) {
+ stream = Stream::find_stream(handle, false);
- OffloadDescriptor *task = m_device.find_signal(waits[i], true);
- if (task == 0) {
- LIBOFFLOAD_ERROR(c_offload1, m_device.get_logical_index(),
- waits[i]);
- LIBOFFLOAD_ABORT;
- }
+ // the stream was not created or was destroyed
+ if (!stream) {
+ LIBOFFLOAD_ERROR(c_offload_no_stream, m_device.get_logical_index());
+ LIBOFFLOAD_ABORT;
+ }
+ task = stream->get_last_offload();
- if (!task->offload_finish()) {
- ret = false;
+ // offload was completed by previous offload_wait pragma
+ // or wait clause
+ if (task == 0) {
+ return true;
+ }
+ if (!task->offload_finish(0)) { //arg is 0 for is_traceback
+ ret = false;
+ }
+ task->cleanup();
+ stream->set_last_offload(NULL);
+ delete task;
}
+ // all streams of the device or over all devices
+ else {
+ StreamMap stream_map = Stream::all_streams;
+ for (StreamMap::iterator it = stream_map.begin();
+ it != stream_map.end(); it++) {
+ Stream * stream = it->second;
- task->cleanup();
- delete task;
- }
+ if (!m_wait_all_devices &&
+ stream->get_device() != m_device.get_logical_index()) {
+ continue;
+ }
+ // get associated async task
+ OffloadDescriptor *task = stream->get_last_offload();
+ // offload was completed by offload_wait pragma or wait clause
+ if (task == 0) {
+ continue;
+ }
+ if (!task->offload_finish(0)) { //arg is 0 for is_traceback
+ ret = false;
+ }
+ task->cleanup();
+ stream->set_last_offload(NULL);
+ delete task;
+ }
+ // no uncompleted streams
+ return true;
+ }
+ }
+ else {
+ // if handle is equal to no_stream it's wait for signals
+ for (int i = 0; i < num_waits; i++) {
+ _Offload_stream stream_handle;
+ Stream *stream;
+ task = m_device.find_signal(waits[i], true);
+ if (task == 0) {
+ LIBOFFLOAD_ERROR(c_offload1, m_device.get_logical_index(),
+ waits[i]);
+ LIBOFFLOAD_ABORT;
+ }
+ else if (task == SIGNAL_IS_REMOVED) {
+ continue;
+ }
+ if (!task->offload_finish(0)) { //arg is 0 for is_traceback
+ ret = false;
+ }
+ task->cleanup();
+ // if the offload both has signal and is last offload of its
+ // stream, we must wipe out the "last_offload" reference as
+ // the offload already is finished.
+ stream_handle = task->m_stream;
+ if (stream_handle != -1) {
+ stream = Stream::find_stream(stream_handle, false);
+ if (stream && stream->get_last_offload() == task) {
+ stream->set_last_offload(NULL);
+ }
+ }
+ delete task;
+ }
+ }
return ret;
}
-bool OffloadDescriptor::offload(
+bool OffloadDescriptor::offload_wrap(
const char *name,
bool is_empty,
VarDesc *vars,
@@ -2116,19 +2639,73 @@ bool OffloadDescriptor::offload(
int num_waits,
const void **signal,
int entry_id,
- const void *stack_addr
+ const void *stack_addr,
+ OffloadFlags offload_flags
)
{
+ OffloadWaitKind wait_kind = c_offload_wait_signal;
+ bool is_traceback = offload_flags.bits.fortran_traceback;
+
+ // define kind of wait if any;
+ // there can be one off the following kind:
+ // 1. c_offload_wait_signal for "offload_wait wait(signal)"
+ // 2. c_offload_wait_stream for "offload_wait stream(stream)"
+ // 3. c_offload_wait_all_streams for "offload_wait stream(0)"
+ if (num_waits == -1) {
+ wait_kind = (m_stream == 0) ?
+ c_offload_wait_all_streams :
+ c_offload_wait_stream;
+ }
+ char buf[35];
+ const char *stream_str;
+
+ if (m_stream == no_stream || num_waits >= 0) {
+ stream_str = "none";
+ }
+ else if (m_stream == 0) {
+ stream_str = "all";
+ }
+ else {
+ sprintf(buf, "%#llx", m_stream);
+ stream_str = buf;
+ }
+
if (signal == 0) {
OFFLOAD_DEBUG_TRACE_1(1,
GET_OFFLOAD_NUMBER(get_timer_data()),
c_offload_init_func,
"Offload function %s, is_empty=%d, #varDescs=%d, "
- "#waits=%d, signal=none\n",
- name, is_empty, vars_total, num_waits);
- OFFLOAD_REPORT(3, GET_OFFLOAD_NUMBER(get_timer_data()),
- c_offload_sent_pointer_data,
- "#Wait : %d \n", num_waits);
+ "signal=none, stream=%s, #waits=%d%c",
+ name, is_empty, vars_total, stream_str, num_waits,
+ num_waits == 0 ? '\n' : ' ');
+ // Breaks the norm of using OFFLOAD_DEBUG_TRACE to print the waits
+ // since the number of waits is not fixed.
+ if (!OFFLOAD_DO_TRACE && (console_enabled >= 1)) {
+ if (num_waits) {
+ printf("(");
+ if (m_stream == no_stream) {
+ printf("%p", waits[0]);
+ for (int i = 1; i < num_waits; i++) {
+ printf(", %p", waits[i]);
+ }
+ }
+ else if (m_stream != 0) {
+ printf("%#x", m_stream);
+ }
+ else {
+ printf(" all streams");
+ }
+ printf(")");
+ }
+ printf("\n");
+ fflush(NULL);
+ }
+ // stream in wait is reported further in OFFLOAD_REPORT for waits
+ if (m_stream != no_stream && num_waits == 0) {
+ OFFLOAD_REPORT(3, GET_OFFLOAD_NUMBER(get_timer_data()),
+ c_offload_stream,
+ "%d\n", m_stream);
+ }
OFFLOAD_REPORT(3, GET_OFFLOAD_NUMBER(get_timer_data()),
c_offload_signal,
"none %d\n", 0);
@@ -2138,27 +2715,62 @@ bool OffloadDescriptor::offload(
GET_OFFLOAD_NUMBER(get_timer_data()),
c_offload_init_func,
"Offload function %s, is_empty=%d, #varDescs=%d, "
- "#waits=%d, signal=%p\n",
- name, is_empty, vars_total, num_waits,
- *signal);
-
+ "signal=%p, stream=%s, #waits=%d%c",
+ name, is_empty, vars_total, *signal, stream_str, num_waits,
+ num_waits == 0 ? '\n' : ' ');
+ // Breaks the norm of using OFFLOAD_DEBUG_TRACE to print the waits
+ // since the number of waits is not fixed.
+ if (!OFFLOAD_DO_TRACE && (console_enabled >= 1)) {
+ if (num_waits) {
+ printf("(");
+ if (m_stream == no_stream) {
+ printf("%p", waits[0]);
+ for (int i = 1; i < num_waits; i++) {
+ printf(", %p", waits[i]);
+ }
+ printf(")");
+ }
+ else if (m_stream != 0) {
+ printf("%#x", m_stream);
+ }
+ else {
+ printf(" all streams");
+ }
+ printf(")");
+ }
+ printf("\n");
+ fflush(NULL);
+ }
+ // stream in wait is reported further in OFFLOAD_REPORT for waits
+ if (m_stream != no_stream && num_waits == 0) {
+ OFFLOAD_REPORT(3, GET_OFFLOAD_NUMBER(get_timer_data()),
+ c_offload_stream,
+ "%d\n", m_stream);
+ }
OFFLOAD_REPORT(3, GET_OFFLOAD_NUMBER(get_timer_data()),
c_offload_signal,
"%d\n", signal);
}
+ if (console_enabled >= 1 && offload_flags.flags != 0) {
+ trace_offload_flags(get_timer_data(), offload_flags);
+ }
+
OFFLOAD_REPORT(3, GET_OFFLOAD_NUMBER(get_timer_data()),
- c_offload_wait,
- "#Wait : %d %p\n", num_waits, waits);
+ c_offload_wait, "%d\n",
+ wait_kind, num_waits,
+ (wait_kind == c_offload_wait_signal) ?
+ waits :
+ reinterpret_cast<const void **>(m_stream));
if (m_status != 0) {
m_status->result = OFFLOAD_SUCCESS;
m_status->device_number = m_device.get_logical_index();
}
- m_need_runfunction = !is_empty;
+ m_initial_need_runfunction = m_need_runfunction = !is_empty;
// wait for dependencies to finish
- if (!wait_dependencies(waits, num_waits)) {
+ if (!wait_dependencies(waits, num_waits, m_stream)) {
cleanup();
return false;
}
@@ -2169,8 +2781,13 @@ bool OffloadDescriptor::offload(
return false;
}
+ if (offload_flags.bits.omp_async) {
+ setup_omp_async_info();
+ }
+
// initiate send for pointers. Want to do it as early as possible.
- if (!send_pointer_data(signal != 0)) {
+ if (!send_pointer_data(signal != 0 || offload_flags.bits.omp_async,
+ signal)) {
cleanup();
return false;
}
@@ -2188,25 +2805,46 @@ bool OffloadDescriptor::offload(
}
// Start the computation
- if (!compute()) {
+ if (!compute(signal)) {
cleanup();
return false;
}
// initiate receive for pointers
- if (!receive_pointer_data(signal != 0)) {
+ if (!receive_pointer_data(signal != 0 || offload_flags.bits.omp_async,
+ true, signal)) {
cleanup();
return false;
}
-
- // if there is a signal save descriptor for the later use.
- if (signal != 0) {
- m_device.add_signal(*signal, this);
+ if (offload_flags.bits.omp_async) {
return true;
}
+ // if there is a signal or stream save descriptor for the later use.
+ // num_waits == -1 is for offload_wait and there is nothing to save
+ if (num_waits != -1 && (signal != 0 || m_stream != no_stream)) {
+ if (signal != 0) {
+ m_device.add_signal(*signal, this);
+ }
+
+ if (m_stream != no_stream && m_stream != 0) {
+ Stream* stream = Stream::find_stream(m_stream, false);
+ if (stream) {
+ stream->set_last_offload(this);
+ }
+ else {
+ LIBOFFLOAD_ERROR(c_offload_no_stream, m_device.get_logical_index());
+ LIBOFFLOAD_ABORT;
+ }
+ }
+ // if there is a clause with alloc_if(1) and preallocated need to call
+ // offload_finish after runfunction
+ if (!m_preallocated_alloc) {
+ return true;
+ }
+ }
// wait for the offload to finish.
- if (!offload_finish()) {
+ if (!offload_finish(is_traceback)) {
cleanup();
return false;
}
@@ -2215,7 +2853,38 @@ bool OffloadDescriptor::offload(
return true;
}
-bool OffloadDescriptor::offload_finish()
+bool OffloadDescriptor::offload(
+ const char *name,
+ bool is_empty,
+ VarDesc *vars,
+ VarDesc2 *vars2,
+ int vars_total,
+ const void **waits,
+ int num_waits,
+ const void **signal,
+ int entry_id,
+ const void *stack_addr,
+ OffloadFlags offload_flags
+)
+{
+ bool res;
+ res = offload_wrap(name, is_empty, vars, vars2, vars_total,
+ waits, num_waits, signal, entry_id,
+ stack_addr, offload_flags);
+ if (res == false && !m_traceback_called) {
+ if (offload_flags.bits.fortran_traceback) {
+ OFFLOAD_TRACE(3,
+ "Calling Fortran library to continue traceback from MIC\n");
+ FORTRAN_TRACE_BACK(m_status->result);
+ m_traceback_called = true;
+ }
+ }
+ return res;
+}
+
+bool OffloadDescriptor::offload_finish(
+ bool is_traceback
+)
{
COIRESULT res;
@@ -2235,10 +2904,24 @@ bool OffloadDescriptor::offload_finish()
}
if (res != COI_SUCCESS) {
- if (m_status != 0) {
+ if (m_status != 0 && !m_traceback_called) {
m_status->result = translate_coi_error(res);
+ if (is_traceback) {
+ OFFLOAD_TRACE(3,
+ "Calling Fortran library to continue traceback from MIC\n");
+ FORTRAN_TRACE_BACK(m_status->result);
+ m_traceback_called = true;
+ }
return false;
}
+
+ if (is_traceback && !m_traceback_called) {
+ OFFLOAD_TRACE(3,
+ "Calling Fortran library to continue traceback from MIC\n");
+ FORTRAN_TRACE_BACK(OFFLOAD_ERROR);
+ m_traceback_called = true;
+ }
+
report_coi_error(c_event_wait, res);
}
}
@@ -2247,6 +2930,13 @@ bool OffloadDescriptor::offload_finish()
if (!scatter_copyout_data()) {
return false;
}
+
+ if (m_out_with_preallocated &&
+ !receive_pointer_data(m_out_deps_total > 0, false, NULL)) {
+ cleanup();
+ return false;
+ }
+
// wait for receive dependencies to become signaled
if (m_out_deps_total > 0) {
OffloadTimer timer(get_timer_data(), c_offload_host_wait_buffers_reads);
@@ -2320,24 +3010,50 @@ bool OffloadDescriptor::is_signaled()
return signaled;
}
+static Arr_Desc * make_arr_desc(
+ void* ptr_val,
+ int64_t extent_start_val,
+ int64_t extent_elements_val,
+ int64_t size
+)
+{
+ Arr_Desc *res;
+ res = (Arr_Desc *)malloc(sizeof(Arr_Desc));
+ if (res == NULL)
+ LIBOFFLOAD_ERROR(c_malloc);
+ res->base = reinterpret_cast<int64_t>(ptr_val);
+ res->rank = 1;
+ res->dim[0].size = size;
+ res->dim[0].lindex = 0;
+ res->dim[0].lower = extent_start_val;
+ res->dim[0].upper = extent_elements_val + extent_start_val - 1;
+ res->dim[0].stride = 1;
+ return res;
+}
+
// Send pointer data if source or destination or both of them are
// noncontiguous. There is guarantee that length of destination enough for
-// transfered data.
+// transferred data.
bool OffloadDescriptor::send_noncontiguous_pointer_data(
int i,
PtrData* src_data,
PtrData* dst_data,
- COIEVENT *event
+ COIEVENT *event,
+ uint64_t &data_sent,
+ uint32_t in_deps_amount,
+ COIEVENT *in_deps
)
{
int64_t offset_src, offset_dst;
int64_t length_src, length_dst;
int64_t length_src_cur, length_dst_cur;
- int64_t send_size, data_sent = 0;
+ int64_t send_size;
COIRESULT res;
bool dst_is_empty = true;
bool src_is_empty = true;
+ data_sent = 0;
+
// Set length_src and length_dst
length_src = (m_vars_extra[i].read_rng_src) ?
m_vars_extra[i].read_rng_src->range_size : m_vars[i].size;
@@ -2346,6 +3062,90 @@ bool OffloadDescriptor::send_noncontiguous_pointer_data(
m_vars_extra[i].read_rng_dst->range_size : m_vars[i].size;
send_size = (length_src < length_dst) ? length_src : length_dst;
+ // If BufferWriteMultiD is defined we can set values of required arguments
+ // and transfer noncontiguous data via call to the COI routine.
+ if (__offload_use_coi_noncontiguous_transfer && COI::BufferWriteMultiD) {
+ struct Arr_Desc* arr_desc_dst;
+ struct Arr_Desc* arr_desc_src;
+ int64_t size_src, size_dst;
+ char *base = offload_get_src_base(static_cast<char*>(m_vars[i].ptr),
+ m_vars[i].type.src);
+ COIBUFFER dst_buf = m_vars[i].into ?
+ m_vars_extra[i].dst_data->mic_buf :
+ m_vars_extra[i].src_data->mic_buf;
+
+ offset_src = (m_vars_extra[i].read_rng_src)?
+ m_vars_extra[i].read_rng_src->init_offset : m_vars_extra[i].cpu_disp;
+ size_src = m_vars_extra[i].read_rng_src ?
+ cean_get_transf_size(m_vars_extra[i].read_rng_src) :
+ m_vars[i].size;
+
+ offset_dst = (m_vars_extra[i].read_rng_dst)?
+ m_vars_extra[i].read_rng_dst->init_offset : m_vars[i].disp;
+ size_dst = m_vars_extra[i].read_rng_dst ?
+ cean_get_transf_size(m_vars_extra[i].read_rng_dst) : m_vars[i].size;
+
+ int64_t el_size = (!m_vars[i].into ||
+ (m_vars_extra[i].read_rng_src && m_vars_extra[i].read_rng_dst)) ?
+ 1 :
+ m_vars_extra[i].read_rng_src ?
+ m_vars_extra[i].read_rng_src->arr_desc->dim[
+ m_vars_extra[i].read_rng_src->arr_desc->rank - 1].size :
+ m_vars_extra[i].read_rng_dst->arr_desc->dim[
+ m_vars_extra[i].read_rng_dst->arr_desc->rank - 1].size;
+
+ arr_desc_src = (m_vars_extra[i].read_rng_src) ?
+ m_vars_extra[i].read_rng_src->arr_desc :
+ make_arr_desc(NULL, // don't required for source
+ offset_src/el_size, size_src/el_size, el_size);
+
+ arr_desc_dst = !m_vars[i].into ?
+ arr_desc_src :
+ (m_vars_extra[i].read_rng_dst) ?
+ m_vars_extra[i].read_rng_dst->arr_desc :
+ make_arr_desc(NULL,
+ offset_dst/el_size, size_src/el_size, el_size);
+
+ int64_t alloc_disp = m_vars[i].into ?
+ m_vars_extra[i].dst_data->alloc_disp :
+ m_vars_extra[i].src_data->alloc_disp;
+
+ arr_desc_src->base = reinterpret_cast<int64_t>(base);
+ arr_desc_dst->base = 0;
+
+ res = COI::BufferWriteMultiD(
+ dst_buf, // in_DestBuffer,
+ m_device.get_process(), // DestProcess,
+ m_vars[i].offset + m_vars[i].mic_offset -
+ alloc_disp, // Offset
+ (void*)arr_desc_dst, // descriptor of DestArray
+ (void*)arr_desc_src, // descriptor of SrcArray
+ COI_COPY_UNSPECIFIED, // Type
+ in_deps_amount, // Number of in Dependencies
+ in_deps, // array of in Dependencies
+ event); // out Dependency
+ if (res != COI_SUCCESS) {
+ if (m_status != 0) {
+ m_status->result = translate_coi_error(res);
+ return false;
+ }
+ report_coi_error(c_buf_copy, res);
+ }
+ return(true);
+ }
+
+ // if event is defined we must multiplate it for all contiguous intervals
+ // that will be Copied/Write.
+ // Take in account that we already have 1 event.
+ if (event) {
+ m_in_deps_allocated += (length_src / send_size) *
+ ((m_vars_extra[i].read_rng_src) ?
+ m_vars_extra[i].read_rng_src->range_max_number : 1) ;
+ m_in_deps =
+ (COIEVENT*)realloc(m_in_deps, sizeof(COIEVENT) * m_in_deps_allocated);
+ m_in_deps_total--;
+ }
+
// consequently get contiguous ranges,
// define corresponded destination offset and send data
do {
@@ -2402,17 +3202,20 @@ bool OffloadDescriptor::send_noncontiguous_pointer_data(
}
length_dst_cur -= send_size;
dst_is_empty = length_dst_cur == 0;
-
+
+ if (event) {
+ event = &m_in_deps[m_in_deps_total++];
+ }
if (src_data != 0 && src_data->cpu_buf != 0) {
res = COI::BufferCopy(
dst_data->mic_buf,
src_data->cpu_buf,
- m_vars[i].mic_offset - dst_data->alloc_disp +
+ m_vars[i].mic_offset +
m_vars[i].offset + offset_dst,
m_vars_extra[i].cpu_offset + offset_src,
send_size,
COI_COPY_UNSPECIFIED,
- 0, 0,
+ in_deps_amount, in_deps,
event);
if (res != COI_SUCCESS) {
if (m_status != 0) {
@@ -2428,12 +3231,12 @@ bool OffloadDescriptor::send_noncontiguous_pointer_data(
res = COI::BufferWrite(
dst_data->mic_buf,
- m_vars[i].mic_offset - dst_data->alloc_disp +
+ m_vars[i].mic_offset +
m_vars[i].offset + offset_dst,
base + offset_src,
send_size,
COI_COPY_UNSPECIFIED,
- 0, 0,
+ in_deps_amount, in_deps,
event);
if (res != COI_SUCCESS) {
if (m_status != 0) {
@@ -2443,21 +3246,87 @@ bool OffloadDescriptor::send_noncontiguous_pointer_data(
report_coi_error(c_buf_write, res);
}
}
- data_sent += length_src;
+ data_sent += send_size;
}
while (true);
return true;
}
-bool OffloadDescriptor::send_pointer_data(bool is_async)
+bool OffloadDescriptor::send_pointer_data(bool is_async, void* info)
{
OffloadTimer timer(get_timer_data(), c_offload_host_send_pointers);
+ bool should_use_async_buffer_write = m_initial_need_runfunction;
uint64_t ptr_sent = 0;
COIRESULT res;
+ uint32_t in_deps_amount = 0;
+ COIEVENT *in_deps = NULL;
+
+ // For offload_transfer and offload with empty body without signal:
+ // - if there is only one buffer copy - send data synchronously
+ // - if there are multiple buffer copy and
+ // __offload_parallel_copy is false - send data synchronously
+ // - if there are multiple buffer copy and
+ // __offload_parallel_copy is true - send data asynchronously
+ // It concerns only big size data - greater than __offload_use_async_buffer_write.
+ // Data of size less than __offload_use_async_buffer_write are sent synchronously.
+ // Synchronous transfer results in better performance in COI.
+ // __offload_parallel_copy is false by default but can be changed
+ // via environment variable OFFLOAD_PARALLEL_COPY
+ if (!m_initial_need_runfunction && __offload_parallel_copy) {
+ int big_size_count = 0;
+ for (int i = 0; i < m_vars_total; i++) {
+ if (m_vars[i].direction.in &&
+ m_vars[i].size >= __offload_use_async_buffer_write) {
+ switch (m_vars[i].type.dst) {
+ case c_data:
+ case c_void_ptr:
+ case c_cean_var:
+ if (m_vars[i].flags.is_static_dstn) {
+ big_size_count++;
+ }
+ break;
+ case c_string_ptr:
+ case c_data_ptr:
+ case c_cean_var_ptr:
+ case c_dv_ptr:
+ case c_dv_data:
+ case c_dv_ptr_data:
+ case c_dv_data_slice:
+ case c_dv_ptr_data_slice:
+ big_size_count++;
+ break;
+ default:
+ break;
+ }
+ }
+ }
+ if (big_size_count > 1) {
+ should_use_async_buffer_write = true;
+ }
+ }
+
+ if (m_stream != no_stream && m_vars_total != 0) {
+ get_stream_in_dependencies(in_deps_amount, in_deps);
+ }
// Initiate send for pointer data
for (int i = 0; i < m_vars_total; i++) {
+ uint64_t sent_data = m_vars[i].size;
+ uint32_t in_deps_amount_save;
+ COIEVENT *in_deps_save;
+
+ if (m_vars_extra[i].omp_last_event_type == c_last_write) {
+ in_deps_amount_save = in_deps_amount;
+ in_deps_save = in_deps;
+ in_deps_amount = m_in_deps_total;
+ if (in_deps_amount > 0) {
+ in_deps = (COIEVENT*) malloc(sizeof(COIEVENT) * in_deps_amount);
+ if (in_deps == NULL)
+ LIBOFFLOAD_ERROR(c_malloc);
+ memcpy(in_deps, m_in_deps,in_deps_amount * sizeof(COIEVENT));
+ }
+ }
switch (m_vars[i].type.dst) {
case c_data_ptr_array:
break;
@@ -2468,7 +3337,8 @@ bool OffloadDescriptor::send_pointer_data(bool is_async)
m_vars[i].flags.is_static_dstn) {
COIEVENT *event =
(is_async ||
- m_vars[i].size >= __offload_use_async_buffer_write) ?
+ (should_use_async_buffer_write &&
+ m_vars[i].size >= __offload_use_async_buffer_write)) ?
&m_in_deps[m_in_deps_total++] : 0;
PtrData* dst_data = m_vars[i].into ?
m_vars_extra[i].dst_data :
@@ -2482,7 +3352,8 @@ bool OffloadDescriptor::send_pointer_data(bool is_async)
if (m_vars[i].flags.is_noncont_src ||
m_vars[i].flags.is_noncont_dst) {
if (!send_noncontiguous_pointer_data(
- i, src_data, dst_data, event)) {
+ i, src_data, dst_data, event, sent_data,
+ in_deps_amount, in_deps)) {
return false;
}
}
@@ -2490,13 +3361,13 @@ bool OffloadDescriptor::send_pointer_data(bool is_async)
res = COI::BufferCopy(
dst_data->mic_buf,
src_data->cpu_buf,
- m_vars[i].mic_offset - dst_data->alloc_disp +
+ m_vars[i].mic_offset +
m_vars[i].offset + m_vars[i].disp,
m_vars_extra[i].cpu_offset +
m_vars_extra[i].cpu_disp,
m_vars[i].size,
COI_COPY_UNSPECIFIED,
- 0, 0,
+ in_deps_amount, in_deps,
event);
if (res != COI_SUCCESS) {
if (m_status != 0) {
@@ -2511,12 +3382,12 @@ bool OffloadDescriptor::send_pointer_data(bool is_async)
m_vars[i].type.src);
res = COI::BufferWrite(
dst_data->mic_buf,
- m_vars[i].mic_offset - dst_data->alloc_disp +
+ m_vars[i].mic_offset +
m_vars[i].offset + m_vars[i].disp,
base + m_vars_extra[i].cpu_disp,
m_vars[i].size,
COI_COPY_UNSPECIFIED,
- 0, 0,
+ in_deps_amount, in_deps,
event);
if (res != COI_SUCCESS) {
if (m_status != 0) {
@@ -2526,7 +3397,7 @@ bool OffloadDescriptor::send_pointer_data(bool is_async)
report_coi_error(c_buf_write, res);
}
}
- ptr_sent += m_vars[i].size;
+ ptr_sent += sent_data;
}
break;
@@ -2537,7 +3408,8 @@ bool OffloadDescriptor::send_pointer_data(bool is_async)
if (m_vars[i].direction.in && m_vars[i].size > 0) {
COIEVENT *event =
(is_async ||
- m_vars[i].size >= __offload_use_async_buffer_write) ?
+ (should_use_async_buffer_write &&
+ m_vars[i].size >= __offload_use_async_buffer_write)) ?
&m_in_deps[m_in_deps_total++] : 0;
PtrData* dst_data = m_vars[i].into ?
m_vars_extra[i].dst_data :
@@ -2551,19 +3423,20 @@ bool OffloadDescriptor::send_pointer_data(bool is_async)
if (m_vars[i].flags.is_noncont_src ||
m_vars[i].flags.is_noncont_dst) {
send_noncontiguous_pointer_data(
- i, src_data, dst_data, event);
+ i, src_data, dst_data, event, sent_data,
+ in_deps_amount, in_deps);
}
else if (src_data != 0 && src_data->cpu_buf != 0) {
res = COI::BufferCopy(
dst_data->mic_buf,
src_data->cpu_buf,
- m_vars[i].mic_offset - dst_data->alloc_disp +
+ m_vars[i].mic_offset +
m_vars[i].offset + m_vars[i].disp,
m_vars_extra[i].cpu_offset +
m_vars_extra[i].cpu_disp,
m_vars[i].size,
COI_COPY_UNSPECIFIED,
- 0, 0,
+ in_deps_amount, in_deps,
event);
if (res != COI_SUCCESS) {
if (m_status != 0) {
@@ -2578,12 +3451,12 @@ bool OffloadDescriptor::send_pointer_data(bool is_async)
m_vars[i].type.src);
res = COI::BufferWrite(
dst_data->mic_buf,
- m_vars[i].mic_offset - dst_data->alloc_disp +
+ m_vars[i].mic_offset +
m_vars[i].offset + m_vars[i].disp,
base + m_vars_extra[i].cpu_disp,
m_vars[i].size,
COI_COPY_UNSPECIFIED,
- 0, 0,
+ in_deps_amount, in_deps,
event);
if (res != COI_SUCCESS) {
if (m_status != 0) {
@@ -2594,7 +3467,7 @@ bool OffloadDescriptor::send_pointer_data(bool is_async)
}
}
- ptr_sent += m_vars[i].size;
+ ptr_sent += sent_data;
}
break;
@@ -2609,26 +3482,27 @@ bool OffloadDescriptor::send_pointer_data(bool is_async)
COIEVENT *event =
(is_async ||
- m_vars[i].size >= __offload_use_async_buffer_write) ?
+ (should_use_async_buffer_write &&
+ m_vars[i].size >= __offload_use_async_buffer_write)) ?
&m_in_deps[m_in_deps_total++] : 0;
if (m_vars[i].flags.is_noncont_src ||
m_vars[i].flags.is_noncont_dst) {
send_noncontiguous_pointer_data(
- i, src_data, ptr_data, event);
+ i, src_data, ptr_data, event, sent_data,
+ in_deps_amount, in_deps);
}
else if (src_data && src_data->cpu_buf != 0) {
res = COI::BufferCopy(
ptr_data->mic_buf,
src_data->cpu_buf,
- m_vars[i].offset + ptr_data->mic_offset -
- ptr_data->alloc_disp +
+ m_vars[i].offset + ptr_data->mic_offset +
m_vars[i].disp,
m_vars_extra[i].cpu_offset +
m_vars_extra[i].cpu_disp,
m_vars[i].size,
COI_COPY_UNSPECIFIED,
- 0, 0,
+ in_deps_amount, in_deps,
event);
if (res != COI_SUCCESS) {
if (m_status != 0) {
@@ -2643,12 +3517,12 @@ bool OffloadDescriptor::send_pointer_data(bool is_async)
m_vars[i].type.src);
res = COI::BufferWrite(
ptr_data->mic_buf,
- ptr_data->mic_offset - ptr_data->alloc_disp +
+ ptr_data->mic_offset +
m_vars[i].offset + m_vars[i].disp,
base + m_vars_extra[i].cpu_disp,
m_vars[i].size,
COI_COPY_UNSPECIFIED,
- 0, 0,
+ in_deps_amount, in_deps,
event);
if (res != COI_SUCCESS) {
if (m_status != 0) {
@@ -2658,7 +3532,7 @@ bool OffloadDescriptor::send_pointer_data(bool is_async)
report_coi_error(c_buf_write, res);
}
}
- ptr_sent += m_vars[i].size;
+ ptr_sent += sent_data;
}
break;
@@ -2678,25 +3552,27 @@ bool OffloadDescriptor::send_pointer_data(bool is_async)
m_vars_extra[i].src_data : 0;
COIEVENT *event =
(is_async ||
- m_vars[i].size >= __offload_use_async_buffer_write) ?
+ (should_use_async_buffer_write &&
+ m_vars[i].size >= __offload_use_async_buffer_write)) ?
&m_in_deps[m_in_deps_total++] : 0;
if (m_vars[i].flags.is_noncont_src ||
m_vars[i].flags.is_noncont_dst) {
send_noncontiguous_pointer_data(
- i, src_data, dst_data, event);
+ i, src_data, dst_data, event, sent_data,
+ in_deps_amount, in_deps);
}
else if (src_data && src_data->cpu_buf != 0) {
res = COI::BufferCopy(
dst_data->mic_buf,
src_data->cpu_buf,
- m_vars[i].offset - dst_data->alloc_disp +
+ m_vars[i].offset +
dst_data->mic_offset +
m_vars[i].disp,
m_vars_extra[i].cpu_offset +
m_vars_extra[i].cpu_disp,
m_vars[i].size,
COI_COPY_UNSPECIFIED,
- 0, 0,
+ in_deps_amount, in_deps,
event);
if (res != COI_SUCCESS) {
if (m_status != 0) {
@@ -2711,12 +3587,12 @@ bool OffloadDescriptor::send_pointer_data(bool is_async)
m_vars[i].type.src);
res = COI::BufferWrite(
dst_data->mic_buf,
- dst_data->mic_offset - dst_data->alloc_disp +
+ dst_data->mic_offset +
m_vars[i].offset + m_vars[i].disp,
base + m_vars_extra[i].cpu_disp,
m_vars[i].size,
COI_COPY_UNSPECIFIED,
- 0, 0,
+ in_deps_amount, in_deps,
event);
if (res != COI_SUCCESS) {
if (m_status != 0) {
@@ -2727,14 +3603,18 @@ bool OffloadDescriptor::send_pointer_data(bool is_async)
}
}
- ptr_sent += m_vars[i].size;
+ ptr_sent += sent_data;
}
break;
default:
break;
}
-
+ if (m_vars_extra[i].omp_last_event_type == c_last_write) {
+ in_deps_amount = in_deps_amount_save;
+ in_deps = in_deps_save;
+ register_omp_event_call_back(&m_in_deps[m_in_deps_total - 1], info);
+ }
// alloc field isn't used at target.
// We can reuse it for offset of array pointers.
if (m_vars_extra[i].is_arr_ptr_el) {
@@ -2901,7 +3781,7 @@ bool OffloadDescriptor::gather_copyin_data()
return true;
}
-bool OffloadDescriptor::compute()
+bool OffloadDescriptor::compute(void *info)
{
OffloadTimer timer(get_timer_data(), c_offload_host_start_compute);
@@ -2926,12 +3806,21 @@ bool OffloadDescriptor::compute()
// dispatch task
COIRESULT res;
COIEVENT event;
- res = m_device.compute(m_compute_buffers,
+ uint32_t in_deps_amount = m_in_deps_total;
+ COIEVENT *in_deps = m_in_deps_total > 0 ? m_in_deps : 0;
+
+ if (0 == m_in_deps_total && m_stream != no_stream) {
+ get_stream_in_dependencies(in_deps_amount, in_deps);
+ }
+
+ res = m_device.compute(m_stream,
+ m_compute_buffers,
misc, misc_len,
ret, ret_len,
- m_in_deps_total,
- m_in_deps_total > 0 ? m_in_deps : 0,
+ in_deps_amount,
+ in_deps,
&event);
+
if (res != COI_SUCCESS) {
if (m_status != 0) {
m_status->result = translate_coi_error(res);
@@ -2940,6 +3829,10 @@ bool OffloadDescriptor::compute()
report_coi_error(c_pipeline_run_func, res);
}
+ if (m_omp_async_last_event_type == c_last_runfunc) {
+ register_omp_event_call_back(&event, info);
+ }
+
m_in_deps_total = 1;
m_in_deps[0] = event;
}
@@ -2947,34 +3840,114 @@ bool OffloadDescriptor::compute()
return true;
}
-// recieve pointer data if source or destination or both of them are
+// receive pointer data if source or destination or both of them are
// noncontiguous. There is guarantee that length of destination enough for
-// transfered data.
-bool OffloadDescriptor::recieve_noncontiguous_pointer_data(
+// transferred data.
+bool OffloadDescriptor::receive_noncontiguous_pointer_data(
int i,
- char* base,
COIBUFFER dst_buf,
- COIEVENT *event
+ COIEVENT *event,
+ uint64_t &received_data,
+ uint32_t in_deps_amount,
+ COIEVENT *in_deps
)
{
int64_t offset_src, offset_dst;
int64_t length_src, length_dst;
int64_t length_src_cur, length_dst_cur;
- int64_t recieve_size, data_recieved = 0;
+ int64_t receive_size;
COIRESULT res;
bool dst_is_empty = true;
bool src_is_empty = true;
+ char *base = offload_get_src_base(
+ m_vars[i].into ?
+ static_cast<char*>(m_vars[i].into) :
+ static_cast<char*>(m_vars[i].ptr),
+ m_vars[i].type.dst);
+ received_data = 0;
+
// Set length_src and length_dst
length_src = (m_vars_extra[i].read_rng_src) ?
m_vars_extra[i].read_rng_src->range_size : m_vars[i].size;
length_dst = !m_vars[i].into ? length_src :
(m_vars_extra[i].read_rng_dst) ?
m_vars_extra[i].read_rng_dst->range_size : m_vars[i].size;
- recieve_size = (length_src < length_dst) ? length_src : length_dst;
-
+ receive_size = (length_src < length_dst) ? length_src : length_dst;
+
+ // If BufferReadMultiD is defined we can set values of required arguments
+ // and transfer noncontiguous data via call to the COI routine.
+ if (__offload_use_coi_noncontiguous_transfer && COI::BufferReadMultiD) {
+ struct Arr_Desc* arr_desc_dst;
+ struct Arr_Desc* arr_desc_src;
+ int64_t size_src, size_dst;
+
+ offset_src = (m_vars_extra[i].read_rng_src)?
+ m_vars_extra[i].read_rng_src->init_offset : m_vars[i].disp;
+ size_src = m_vars_extra[i].read_rng_src ?
+ cean_get_transf_size(m_vars_extra[i].read_rng_src) :
+ m_vars[i].size;
+
+ offset_dst = (m_vars_extra[i].read_rng_dst)?
+ m_vars_extra[i].read_rng_dst->init_offset : m_vars_extra[i].cpu_disp;
+ size_dst = m_vars_extra[i].read_rng_dst ?
+ cean_get_transf_size(m_vars_extra[i].read_rng_dst) : m_vars[i].size;
+
+ int64_t el_size = (!m_vars[i].into ||
+ (m_vars_extra[i].read_rng_src &&
+ m_vars_extra[i].read_rng_dst)) ?
+ 1 :
+ m_vars_extra[i].read_rng_src ?
+ m_vars_extra[i].read_rng_src->arr_desc->dim[
+ m_vars_extra[i].read_rng_src->arr_desc->rank - 1].size :
+ m_vars_extra[i].read_rng_dst->arr_desc->dim[
+ m_vars_extra[i].read_rng_dst->arr_desc->rank - 1].size;
+ arr_desc_src = (m_vars_extra[i].read_rng_src) ?
+ m_vars_extra[i].read_rng_src->arr_desc :
+ make_arr_desc(NULL, // don't required for source
+ offset_src/el_size, size_src/el_size,
+ el_size);
+ arr_desc_dst = !m_vars[i].into ? arr_desc_src :
+ (m_vars_extra[i].read_rng_dst) ?
+ m_vars_extra[i].read_rng_dst->arr_desc :
+ make_arr_desc(NULL,
+ offset_dst/el_size, size_src/el_size, el_size);
+
+ arr_desc_dst->base = reinterpret_cast<int64_t>(base);
+
+ res = COI::BufferReadMultiD(
+ m_vars_extra[i].src_data->mic_buf, // SourceBuffer
+ m_vars[i].offset + m_vars[i].mic_offset -
+ m_vars_extra[i].src_data->alloc_disp, // Offset
+ (void*)arr_desc_dst, // descriptor of DestArray
+ (void*)arr_desc_src, // descriptor of SrcArray
+ COI_COPY_UNSPECIFIED, // Type
+ in_deps_amount, // Number of in Dependencies
+ in_deps, // array of in Dependencies
+ event); // out Dependency
+ if (res != COI_SUCCESS) {
+ if (m_status != 0) {
+ m_status->result = translate_coi_error(res);
+ return false;
+ }
+ report_coi_error(c_buf_copy, res);
+ }
+ return(true);
+ }
+ // if event is defined we must multiplate for all contiguous intervals
+ // that will be Copied/Read.
+ // Take in account that we already have 1 event.
+ if (event) {
+ m_out_deps_allocated += (length_src / receive_size) *
+ ((m_vars_extra[i].read_rng_src) ?
+ m_vars_extra[i].read_rng_src->range_max_number : 1) ;
+ m_out_deps =
+ (COIEVENT*)realloc(m_out_deps, sizeof(COIEVENT) * m_out_deps_allocated);
+ m_out_deps_total--;
+ }
+
// consequently get contiguous ranges,
- // define corresponded destination offset and recieve data
+ // define corresponded destination offset and receive data
do {
// get sorce offset
if (src_is_empty) {
@@ -2985,8 +3958,8 @@ bool OffloadDescriptor::recieve_noncontiguous_pointer_data(
break;
}
}
- else if (data_recieved == 0) {
- offset_src = 0;
+ else if (received_data == 0) {
+ offset_src = m_vars[i].disp;
}
else {
break;
@@ -2996,9 +3969,9 @@ bool OffloadDescriptor::recieve_noncontiguous_pointer_data(
else {
// if source is contiguous or its contiguous range is greater
// than destination one
- offset_src += recieve_size;
+ offset_src += receive_size;
}
- length_src_cur -= recieve_size;
+ length_src_cur -= receive_size;
src_is_empty = length_src_cur == 0;
// get destination offset
@@ -3027,23 +4000,24 @@ bool OffloadDescriptor::recieve_noncontiguous_pointer_data(
else {
// if destination is contiguous or its contiguous range is greater
// than source one
- offset_dst += recieve_size;
+ offset_dst += receive_size;
}
- length_dst_cur -= recieve_size;
+ length_dst_cur -= receive_size;
dst_is_empty = length_dst_cur == 0;
-
+ if (event) {
+ event = &m_out_deps[m_out_deps_total++];
+ }
if (dst_buf != 0) {
res = COI::BufferCopy(
dst_buf,
m_vars_extra[i].src_data->mic_buf,
m_vars_extra[i].cpu_offset + offset_dst,
m_vars[i].offset + offset_src +
- m_vars[i].mic_offset -
- m_vars_extra[i].src_data->alloc_disp,
- recieve_size,
+ m_vars[i].mic_offset,
+ receive_size,
COI_COPY_UNSPECIFIED,
- m_in_deps_total,
- m_in_deps_total > 0 ? m_in_deps : 0,
+ in_deps_amount,
+ in_deps,
event);
if (res != COI_SUCCESS) {
if (m_status != 0) {
@@ -3057,13 +4031,12 @@ bool OffloadDescriptor::recieve_noncontiguous_pointer_data(
res = COI::BufferRead(
m_vars_extra[i].src_data->mic_buf,
m_vars[i].offset + offset_src +
- m_vars[i].mic_offset -
- m_vars_extra[i].src_data->alloc_disp,
+ m_vars[i].mic_offset,
base + offset_dst,
- recieve_size,
+ receive_size,
COI_COPY_UNSPECIFIED,
- m_in_deps_total,
- m_in_deps_total > 0 ? m_in_deps : 0,
+ in_deps_amount,
+ in_deps,
event);
if (res != COI_SUCCESS) {
if (m_status != 0) {
@@ -3073,20 +4046,109 @@ bool OffloadDescriptor::recieve_noncontiguous_pointer_data(
report_coi_error(c_buf_read, res);
}
}
- data_recieved += recieve_size;
+ received_data += receive_size;
}
while (true);
return true;
}
-bool OffloadDescriptor::receive_pointer_data(bool is_async)
+bool OffloadDescriptor::receive_pointer_data(bool is_async,
+ bool first_run, void *info)
{
OffloadTimer timer(get_timer_data(), c_offload_host_start_buffers_reads);
+ bool should_use_async_buffer_read = m_initial_need_runfunction;
uint64_t ptr_received = 0;
COIRESULT res;
+ // For offload_transfer and offload with empty body without signal:
+ // - if there is only one buffer copy - get data synchronously
+ // - if there are multiple buffer copy and
+ // __offload_parallel_copy is false - get data synchronously
+ // - if there are multiple buffer copy
+ // and __offload_parallel_copy is true - get data asynchronously
+ // It concerns only data with size greater than __offload_use_async_buffer_read.
+ // Data of size less than __offload_use_async_buffer_read are received synchronously.
+ // Synchronous transfer results in better performance in COI.
+ // __offload_parallel_copy is false by default but can be changed
+ // via environment variable OFFLOAD_PARALLEL_COPY
+ if (!m_initial_need_runfunction && __offload_parallel_copy) {
+ int big_size_count = 0;
+
+ for (int i = 0; i < m_vars_total; i++) {
+ if (m_vars[i].direction.out &&
+ m_vars[i].size >= __offload_use_async_buffer_read) {
+ // preallocated OUT only at second run
+ if (first_run == m_vars[i].flags.preallocated) {
+ continue;
+ }
+ switch (m_vars[i].type.src) {
+ case c_data:
+ case c_void_ptr:
+ case c_cean_var:
+ if (m_vars[i].flags.is_static) {
+ big_size_count++;
+ }
+ break;
+ case c_string_ptr:
+ case c_data_ptr:
+ case c_cean_var_ptr:
+ case c_dv_data:
+ case c_dv_ptr_data:
+ case c_dv_data_slice:
+ case c_dv_ptr_data_slice:
+ case c_dv_ptr:
+ big_size_count++;
+ break;
+ default:
+ break;
+ }
+ }
+ }
+ if (big_size_count > 1) {
+ should_use_async_buffer_read = true;
+ }
+ }
+ uint32_t in_deps_amount = m_in_deps_total;
+ COIEVENT *in_deps = m_in_deps_total > 0 ? m_in_deps : 0;
+
+ if (0 == m_in_deps_total &&
+ m_stream != no_stream &&
+ m_vars_total != 0) {
+ get_stream_in_dependencies(in_deps_amount, in_deps);
+ }
+
for (int i = 0; i < m_vars_total; i++) {
+ uint64_t received_data = m_vars[i].size;
+ uint32_t in_deps_amount_save;
+ COIEVENT *in_deps_save;
+
+ if (m_vars_extra[i].omp_last_event_type == c_last_read) {
+ in_deps_amount_save = in_deps_amount;
+ in_deps_save = in_deps;
+
+ in_deps_amount += m_out_deps_total;
+ if (in_deps_amount > 0) {
+ in_deps = (COIEVENT*) malloc(sizeof(COIEVENT) * in_deps_amount);
+ if (in_deps == NULL)
+ LIBOFFLOAD_ERROR(c_malloc);
+ memcpy(in_deps, in_deps_save,
+ in_deps_amount_save * sizeof(COIEVENT));
+ memcpy(in_deps + in_deps_amount_save * sizeof(COIEVENT),
+ m_out_deps,
+ m_out_deps_total * sizeof(COIEVENT));
+ }
+ }
+ // At first run don't receive by preallocated target pointer as the
+ //pointer value will be ready later after call to scatter_copyout_data
+ if (first_run && m_vars[i].alloc_if && m_vars[i].flags.preallocated) {
+ m_preallocated_alloc = true;
+ // need one more call to OffloadDescriptor::receive_pointer_data
+ if (m_vars[i].direction.out) {
+ m_out_with_preallocated = true;
+ }
+ continue;
+ }
switch (m_vars[i].type.src) {
case c_data_ptr_array:
break;
@@ -3098,7 +4160,8 @@ bool OffloadDescriptor::receive_pointer_data(bool is_async)
COIEVENT *event =
(is_async ||
m_in_deps_total > 0 ||
- m_vars[i].size >= __offload_use_async_buffer_read) ?
+ (should_use_async_buffer_read &&
+ m_vars[i].size >= __offload_use_async_buffer_read)) ?
&m_out_deps[m_out_deps_total++] : 0;
PtrData *ptr_data = NULL;
COIBUFFER dst_buf = NULL; // buffer at host
@@ -3127,8 +4190,9 @@ bool OffloadDescriptor::receive_pointer_data(bool is_async)
if (m_vars[i].flags.is_noncont_src ||
m_vars[i].flags.is_noncont_dst) {
- recieve_noncontiguous_pointer_data(
- i, base, dst_buf, event);
+ receive_noncontiguous_pointer_data(
+ i, dst_buf, event, received_data,
+ in_deps_amount, in_deps);
}
else if (dst_buf != 0) {
res = COI::BufferCopy(
@@ -3139,8 +4203,8 @@ bool OffloadDescriptor::receive_pointer_data(bool is_async)
m_vars[i].offset + m_vars[i].disp,
m_vars[i].size,
COI_COPY_UNSPECIFIED,
- m_in_deps_total,
- m_in_deps_total > 0 ? m_in_deps : 0,
+ in_deps_amount,
+ in_deps,
event);
if (res != COI_SUCCESS) {
if (m_status != 0) {
@@ -3158,8 +4222,8 @@ bool OffloadDescriptor::receive_pointer_data(bool is_async)
m_vars_extra[i].cpu_disp,
m_vars[i].size,
COI_COPY_UNSPECIFIED,
- m_in_deps_total,
- m_in_deps_total > 0 ? m_in_deps : 0,
+ in_deps_amount,
+ in_deps,
event);
if (res != COI_SUCCESS) {
if (m_status != 0) {
@@ -3169,7 +4233,7 @@ bool OffloadDescriptor::receive_pointer_data(bool is_async)
report_coi_error(c_buf_read, res);
}
}
- ptr_received += m_vars[i].size;
+ ptr_received += received_data;
}
break;
@@ -3186,7 +4250,8 @@ bool OffloadDescriptor::receive_pointer_data(bool is_async)
COIEVENT *event =
(is_async ||
m_in_deps_total > 0 ||
- m_vars[i].size >= __offload_use_async_buffer_read) ?
+ (should_use_async_buffer_read &&
+ m_vars[i].size >= __offload_use_async_buffer_read)) ?
&m_out_deps[m_out_deps_total++] : 0;
uint64_t dst_offset = 0;
@@ -3241,8 +4306,10 @@ bool OffloadDescriptor::receive_pointer_data(bool is_async)
if (m_vars[i].flags.is_noncont_src ||
m_vars[i].flags.is_noncont_dst) {
- recieve_noncontiguous_pointer_data(
- i, base, dst_buf, event);
+ receive_noncontiguous_pointer_data(
+ i, dst_buf, event, received_data,
+ in_deps_amount,
+ in_deps);
}
else if (dst_buf != 0) {
res = COI::BufferCopy(
@@ -3250,12 +4317,11 @@ bool OffloadDescriptor::receive_pointer_data(bool is_async)
m_vars_extra[i].src_data->mic_buf,
dst_offset,
m_vars[i].offset + m_vars[i].disp +
- m_vars[i].mic_offset -
- m_vars_extra[i].src_data->alloc_disp,
+ m_vars[i].mic_offset,
m_vars[i].size,
COI_COPY_UNSPECIFIED,
- m_in_deps_total,
- m_in_deps_total > 0 ? m_in_deps : 0,
+ in_deps_amount,
+ in_deps,
event);
if (res != COI_SUCCESS) {
if (m_status != 0) {
@@ -3269,13 +4335,12 @@ bool OffloadDescriptor::receive_pointer_data(bool is_async)
res = COI::BufferRead(
m_vars_extra[i].src_data->mic_buf,
m_vars[i].offset + m_vars[i].disp +
- m_vars[i].mic_offset -
- m_vars_extra[i].src_data->alloc_disp,
+ m_vars[i].mic_offset,
base + dst_offset,
m_vars[i].size,
COI_COPY_UNSPECIFIED,
- m_in_deps_total,
- m_in_deps_total > 0 ? m_in_deps : 0,
+ in_deps_amount,
+ in_deps,
event);
if (res != COI_SUCCESS) {
if (m_status != 0) {
@@ -3285,7 +4350,7 @@ bool OffloadDescriptor::receive_pointer_data(bool is_async)
report_coi_error(c_buf_read, res);
}
}
- ptr_received += m_vars[i].size;
+ ptr_received += received_data;
}
break;
}
@@ -3294,6 +4359,11 @@ bool OffloadDescriptor::receive_pointer_data(bool is_async)
break;
}
+ if (m_vars_extra[i].omp_last_event_type == c_last_read) {
+ in_deps_amount = in_deps_amount_save;
+ in_deps = in_deps_save;
+ register_omp_event_call_back(&m_out_deps[m_out_deps_total - 1], info);
+ }
// destroy buffers for obsolete stacks
if (m_destroy_stack.size() != 0) {
for (PtrDataList::iterator it = m_destroy_stack.begin();
@@ -3312,8 +4382,13 @@ bool OffloadDescriptor::receive_pointer_data(bool is_async)
m_vars[i].type.src == c_void_ptr ||
m_vars[i].type.src == c_cean_var)) {
AutoData *auto_data = m_vars_extra[i].auto_data;
- if (auto_data != 0 && auto_data->remove_reference() == 0) {
- m_device.remove_auto_data(auto_data->cpu_addr.start());
+ if (auto_data != 0) {
+ if (m_vars[i].flags.always_delete) {
+ auto_data->nullify_reference();
+ }
+ else if(auto_data->remove_reference() == 0) {
+ m_device.remove_auto_data(auto_data->cpu_addr.start());
+ }
}
}
@@ -3338,7 +4413,12 @@ bool OffloadDescriptor::receive_pointer_data(bool is_async)
ptr_data->cpu_addr.start());
// remove association from map
- m_device.remove_ptr_data(ptr_data->cpu_addr.start());
+ if (m_vars[i].flags.targetptr) {
+ m_device.remove_targetptr_data(ptr_data->cpu_addr.start());
+ }
+ else {
+ m_device.remove_ptr_data(ptr_data->cpu_addr.start());
+ }
}
}
else if (VAR_TYPE_IS_PTR(m_vars[i].type.dst) ||
@@ -3357,7 +4437,12 @@ bool OffloadDescriptor::receive_pointer_data(bool is_async)
ptr_data->cpu_addr.start());
// remove association from map
- m_device.remove_ptr_data(ptr_data->cpu_addr.start());
+ if (m_vars[i].flags.targetptr) {
+ m_device.remove_targetptr_data(ptr_data->cpu_addr.start());
+ }
+ else {
+ m_device.remove_ptr_data(ptr_data->cpu_addr.start());
+ }
}
}
}
@@ -3416,6 +4501,60 @@ bool OffloadDescriptor::scatter_copyout_data()
m_out.init_buffer(data, m_out_datalen);
for (int i = 0; i < m_vars_total; i++) {
+ bool src_is_for_mic = (m_vars[i].direction.out ||
+ m_vars[i].into == NULL);
+
+ if (m_vars[i].type.src != c_data_ptr_array &&
+ m_vars[i].flags.preallocated && m_vars[i].alloc_if) {
+ PtrData *ptr_data;
+ void *ptr_value;
+ void ** cpu_ptr = src_is_for_mic ?
+ reinterpret_cast<void**>(m_vars[i].ptr) :
+ reinterpret_cast<void**>(m_vars[i].into);
+ void* alloc_base = NULL;
+ int64_t alloc_disp = 0;
+ int64_t alloc_size;
+ if (m_vars_extra[i].alloc != NULL) {
+ // array descriptor
+ const Arr_Desc *ap =
+ static_cast<const Arr_Desc*>(m_vars_extra[i].alloc);
+
+ __arr_data_offset_and_length(ap, alloc_disp, alloc_size);
+
+ alloc_base = reinterpret_cast<void*>(ap->base);
+ }
+
+ // get pointer to target memory
+ m_out.receive_data(&ptr_value, sizeof(void*));
+
+ // add new entry
+ if (!alloc_ptr_data(
+ ptr_data,
+ ptr_value,
+ (alloc_base != NULL) ?
+ alloc_disp : m_vars[i].disp,
+ (alloc_base != NULL) ?
+ alloc_size : m_vars[i].size,
+ alloc_disp,
+ 0,
+ m_vars[i].flags.targetptr,
+ m_vars[i].flags.preallocated,
+ m_vars[i].flags.pin)) {
+ return false;
+ }
+
+ ptr_data->add_reference();
+ *cpu_ptr = ptr_value;
+ if (src_is_for_mic) {
+ m_vars_extra[i].src_data = ptr_data;
+ }
+ else {
+ m_vars_extra[i].dst_data = ptr_data;
+ }
+ m_vars[i].offset = (char*) ptr_value -
+ (char*) ptr_data->cpu_addr.start();
+ }
+
switch (m_vars[i].type.src) {
case c_data_ptr_array:
break;
@@ -3478,8 +4617,8 @@ bool OffloadDescriptor::scatter_copyout_data()
return true;
}
-void get_arr_desc_numbers(
- const arr_desc *ap,
+static void get_arr_desc_numbers(
+ const Arr_Desc *ap,
int64_t el_size,
int64_t &offset,
int64_t &size,
@@ -3500,33 +4639,12 @@ void get_arr_desc_numbers(
}
}
-arr_desc * make_arr_desc(
- void* ptr_val,
- int64_t extent_start_val,
- int64_t extent_elements_val,
- int64_t size
-)
-{
- arr_desc *res;
- res = (arr_desc *)malloc(sizeof(arr_desc));
- if (res == NULL)
- LIBOFFLOAD_ERROR(c_malloc);
- res->base = reinterpret_cast<int64_t>(ptr_val);
- res->rank = 1;
- res->dim[0].size = size;
- res->dim[0].lindex = 0;
- res->dim[0].lower = extent_start_val;
- res->dim[0].upper = extent_elements_val + extent_start_val - 1;
- res->dim[0].stride = 1;
- return res;
-}
-
bool OffloadDescriptor::gen_var_descs_for_pointer_array(int i)
{
int pointers_number;
int tmp_val;
int new_index = m_vars_total;
- const arr_desc *ap;
+ const Arr_Desc *ap;
const VarDesc3 *vd3 = static_cast<const VarDesc3*>(m_vars[i].ptr);
int flags = vd3->array_fields;
bool src_is_for_mic = (m_vars[i].direction.out ||
@@ -3545,14 +4663,16 @@ bool OffloadDescriptor::gen_var_descs_for_pointer_array(int i)
ReadArrElements<int64_t> alloc_elem;
- ap = static_cast<const arr_desc*>(vd3->ptr_array);
+ ap = static_cast<const Arr_Desc*>(vd3->ptr_array);
- // "pointers_number" for total number of transfered pointers.
+ // "pointers_number" for total number of transferred pointers.
// For each of them we create new var_desc and put it at the bottom
// of the var_desc's array
get_arr_desc_numbers(ap, sizeof(void *), ptr.offset, ptr.size,
pointers_number, ptr.ranges);
- ptr.base = reinterpret_cast<char*>(ap->base);
+ ptr.base = (m_vars[i].flags.is_pointer) ?
+ *(reinterpret_cast<char**>(ap->base)) :
+ reinterpret_cast<char*>(ap->base);
// 2. prepare memory for new var_descs
m_vars_total += pointers_number;
@@ -3575,7 +4695,7 @@ bool OffloadDescriptor::gen_var_descs_for_pointer_array(int i)
// 3. Prepare for reading new var_desc's fields
// EXTENT START
if ((flags & (1<<flag_extent_start_is_array)) != 0) {
- ap = static_cast<const arr_desc*>(vd3->extent_start);
+ ap = static_cast<const Arr_Desc*>(vd3->extent_start);
get_arr_desc_numbers(ap, ap->dim[ap->rank - 1].size, ext_start.offset,
ext_start.size, tmp_val, ext_start.ranges);
ext_start.base = reinterpret_cast<char*>(ap->base);
@@ -3595,7 +4715,7 @@ bool OffloadDescriptor::gen_var_descs_for_pointer_array(int i)
// EXTENT ELEMENTS NUMBER
if ((flags & (1<<flag_extent_elements_is_array)) != 0) {
- ap = static_cast<const arr_desc*>(vd3->extent_elements);
+ ap = static_cast<const Arr_Desc*>(vd3->extent_elements);
get_arr_desc_numbers(ap, ap->dim[ap->rank - 1].size,
ext_elements.offset, ext_elements.size,
tmp_val, ext_elements.ranges);
@@ -3616,7 +4736,7 @@ bool OffloadDescriptor::gen_var_descs_for_pointer_array(int i)
// ALLOC_IF
if ((flags & (1<<flag_alloc_if_is_array)) != 0) {
- ap = static_cast<const arr_desc*>(vd3->alloc_if_array);
+ ap = static_cast<const Arr_Desc*>(vd3->alloc_if_array);
get_arr_desc_numbers(ap, ap->dim[ap->rank - 1].size, alloc_if.offset,
alloc_if.size, tmp_val, alloc_if.ranges);
alloc_if.base = reinterpret_cast<char*>(ap->base);
@@ -3628,12 +4748,12 @@ bool OffloadDescriptor::gen_var_descs_for_pointer_array(int i)
}
}
else {
- alloc_if.val = m_vars[i].count;
+ alloc_if.val = m_vars[i].alloc_if;
}
// FREE_IF
if ((flags & (1<<flag_free_if_is_array)) != 0) {
- ap = static_cast<const arr_desc*>(vd3->free_if_array);
+ ap = static_cast<const Arr_Desc*>(vd3->free_if_array);
get_arr_desc_numbers(ap, ap->dim[ap->rank - 1].size, free_if.offset,
free_if.size, tmp_val, free_if.ranges);
free_if.base = reinterpret_cast<char*>(ap->base);
@@ -3645,13 +4765,13 @@ bool OffloadDescriptor::gen_var_descs_for_pointer_array(int i)
}
}
else {
- free_if.val = m_vars[i].count;
+ free_if.val = m_vars[i].free_if;
}
// ALIGN
if ((flags & (1<<flag_align_is_array)) != 0) {
- ap = static_cast<const arr_desc*>(vd3->align_array);
+ ap = static_cast<const Arr_Desc*>(vd3->align_array);
get_arr_desc_numbers(ap, ap->dim[ap->rank - 1].size, align.offset,
align.size, tmp_val, align.ranges);
align.base = reinterpret_cast<char*>(ap->base);
@@ -3669,7 +4789,7 @@ bool OffloadDescriptor::gen_var_descs_for_pointer_array(int i)
// 3.1 INTO
if (m_vars[i].into) {
- ap = static_cast<const arr_desc*>(m_vars[i].into);
+ ap = static_cast<const Arr_Desc*>(m_vars[i].into);
get_arr_desc_numbers(ap, ap->dim[ap->rank - 1].size, into.offset,
into.size, tmp_val, into.ranges);
into.base = reinterpret_cast<char*>(ap->base);
@@ -3683,7 +4803,7 @@ bool OffloadDescriptor::gen_var_descs_for_pointer_array(int i)
// 3.2 INTO_START
if ((flags & (1<<flag_into_start_is_array)) != 0) {
- ap = static_cast<const arr_desc*>(vd3->into_start);
+ ap = static_cast<const Arr_Desc*>(vd3->into_start);
get_arr_desc_numbers(ap, ap->dim[ap->rank - 1].size, into_start.offset,
into_start.size, tmp_val, into_start.ranges);
into_start.base = reinterpret_cast<char*>(ap->base);
@@ -3704,7 +4824,7 @@ bool OffloadDescriptor::gen_var_descs_for_pointer_array(int i)
// 3.3 INTO_ELEMENTS
if ((flags & (1<<flag_into_elements_is_array)) != 0) {
- ap = static_cast<const arr_desc*>(vd3->into_elements);
+ ap = static_cast<const Arr_Desc*>(vd3->into_elements);
get_arr_desc_numbers(ap, ap->dim[ap->rank - 1].size, into_elem.offset,
into_elem.size, tmp_val, into_elem.ranges);
into_elem.base = reinterpret_cast<char*>(ap->base);
@@ -3725,7 +4845,7 @@ bool OffloadDescriptor::gen_var_descs_for_pointer_array(int i)
// alloc_start
if ((flags & (1<<flag_alloc_start_is_array)) != 0) {
- ap = static_cast<const arr_desc*>(vd3->alloc_start);
+ ap = static_cast<const Arr_Desc*>(vd3->alloc_start);
get_arr_desc_numbers(ap, ap->dim[ap->rank - 1].size,
alloc_start.offset, alloc_start.size, tmp_val,
alloc_start.ranges);
@@ -3747,7 +4867,7 @@ bool OffloadDescriptor::gen_var_descs_for_pointer_array(int i)
// alloc_elem
if ((flags & (1<<flag_alloc_elements_is_array)) != 0) {
- ap = static_cast<const arr_desc*>(vd3->alloc_elements);
+ ap = static_cast<const Arr_Desc*>(vd3->alloc_elements);
get_arr_desc_numbers(ap, ap->dim[ap->rank - 1].size, alloc_elem.offset,
alloc_elem.size, tmp_val, alloc_elem.ranges);
alloc_elem.base = reinterpret_cast<char*>(ap->base);
@@ -3846,6 +4966,9 @@ bool OffloadDescriptor::gen_var_descs_for_pointer_array(int i)
m_vars[new_index + k].flags.bits = m_vars[i].flags.bits;
m_vars[new_index + k].offset = 0;
m_vars[new_index + k].size = m_vars[i].size;
+ m_vars[new_index + k].flags.targetptr = m_vars[i].flags.targetptr;
+ m_vars[new_index + k].flags.preallocated =
+ m_vars[i].flags.preallocated;
if (ext_start.val == 0) {
m_vars[new_index + k].count = ext_elements.val;
@@ -3901,6 +5024,7 @@ bool OffloadDescriptor::gen_var_descs_for_pointer_array(int i)
m_vars[new_index + k].type.src = type_src;
m_vars[new_index + k].type.dst = type_dst;
+ m_vars_extra[new_index + k].alloc = m_vars[new_index + k].alloc;
m_vars_extra[new_index + k].is_arr_ptr_el = 1;
m_vars_extra[new_index + k].ptr_arr_offset =
src_is_for_mic ? ptr.offset : into.offset;
@@ -3912,6 +5036,52 @@ bool OffloadDescriptor::gen_var_descs_for_pointer_array(int i)
return true;
}
+// Gets in dependencies of the previous offload via the stream "m_stream".
+// Out argument in_deps_amount - address of amount of the dependencies
+// Out argument in_deps - array of dependencies.
+// Description of the dependencies scheme for streams :
+// ----------------------------------------------------
+// Every offload forms DAG consisted of 3 nodes:
+// for in-transfers, runfunction and out-transfers.
+// Every node has in-dependencies and out-dependencies
+// Out-dependencies of previous node forms in-dependencies of current node.
+// In-dependencies of 1-st node (of in-transfers) without streams is equal
+// to NULL. For streams in-dependencies of 1-st node is equal to list of out
+// dependencies of last node of previous offload via this stream.
+// So we can say that DAGs of 2 consequent offloads via the same stream are
+// connected by the way described above.
+void OffloadDescriptor::get_stream_in_dependencies(
+ uint32_t &in_deps_amount,
+ COIEVENT* &in_deps
+)
+{
+ if (m_stream != no_stream && m_stream != 0) {
+ Stream * stream = Stream::find_stream(m_stream, false);
+ if (!stream) {
+ LIBOFFLOAD_ERROR(c_offload_no_stream,
+ m_device.get_logical_index());
+ LIBOFFLOAD_ABORT;
+ }
+ OffloadDescriptor* offload = stream->get_last_offload();
+
+ // if it's the first offload in the stream
+ if (!offload) {
+ return;
+ }
+ // if last offload has out-tranfers
+ if (offload->m_out_deps_total) {
+ in_deps_amount = offload->m_out_deps_total;
+ in_deps = offload->m_out_deps;
+ }
+ // last offload only sends pointer data or run function or both of them
+ // and has no out-transfers
+ else if (offload->m_in_deps_total) {
+ in_deps_amount = offload->m_in_deps_total;
+ in_deps = offload->m_in_deps;
+ }
+ }
+}
+
static void __offload_fini_library(void)
{
OFFLOAD_DEBUG_TRACE(2, "Cleanup offload library ...\n");
@@ -3945,7 +5115,6 @@ static void __offload_init_library_once(void)
COIRESULT res;
uint32_t num_devices;
std::bitset<MIC_ENGINES_MAX> devices;
-
prefix = report_get_message_str(c_report_host);
// initialize trace
@@ -3989,7 +5158,7 @@ static void __offload_init_library_once(void)
}
// get number of devices installed in the system
- res = COI::EngineGetCount(COI_ISA_KNC, &num_devices);
+ res = COI::EngineGetCount(COI_ISA_MIC, &num_devices);
if (res != COI_SUCCESS) {
return;
}
@@ -4032,7 +5201,7 @@ static void __offload_init_library_once(void)
// use all available devices
for (int i = 0; i < num_devices; i++) {
COIENGINE engine;
- res = COI::EngineGetHandle(COI_ISA_KNC, i, &engine);
+ res = COI::EngineGetHandle(COI_ISA_MIC, i, &engine);
if (res == COI_SUCCESS) {
devices.set(i);
}
@@ -4055,12 +5224,64 @@ static void __offload_init_library_once(void)
}
}
+ // Get DMA channel count to pass it to COI
+ env_var = getenv("OFFLOAD_DMA_CHANNEL_COUNT");
+ if (env_var != 0) {
+ int64_t new_val;
+ if (__offload_parse_int_string(env_var, new_val)) {
+ mic_dma_channel_count = new_val;
+ }
+ else {
+ LIBOFFLOAD_ERROR(c_invalid_env_var_value,
+ "OFFLOAD_DMA_CHANNEL_COUNT");
+ }
+ }
+
+ // Set COI_HOST_THREAD_AFFINITY if OFFLOAD_HOST_THREAD_AFFINITY is set.
+ // Use putenv instead of setenv as Windows has no setenv.
+ // Note: putenv requires its argument can't be freed or modified.
+ // So no free after call to putenv or elsewhere.
+ env_var = getenv("OFFLOAD_HOST_THREAD_AFFINITY");
+ if (env_var != 0) {
+ char * new_env_var =
+ (char*) malloc(sizeof("COI_HOST_THREAD_AFFINITY=") +
+ sizeof(env_var) + 1);
+ sprintf(new_env_var, "COI_HOST_THREAD_AFFINITY=%s", env_var);
+ putenv(new_env_var);
+ }
+
// library search path for device binaries
env_var = getenv("MIC_LD_LIBRARY_PATH");
if (env_var != 0) {
mic_library_path = strdup(env_var);
}
+
+ // find target executable to be used if main application is not an
+ // offload build application.
+ const char *base_name = "offload_main";
+ if (mic_library_path != 0) {
+ char *buf = strdup(mic_library_path);
+ char *try_name = (char*) alloca(strlen(mic_library_path) +
+ strlen(base_name) + 2);
+ char *dir, *ptr;
+
+ for (dir = strtok_r(buf, PATH_SEPARATOR, &ptr); dir != 0;
+ dir = strtok_r(0, PATH_SEPARATOR, &ptr)) {
+ // compose a full path
+ sprintf(try_name, "%s/%s", dir, base_name);
+
+ // check if such file exists
+ struct stat st;
+ if (stat(try_name, &st) == 0 && S_ISREG(st.st_mode)) {
+ mic_device_main = strdup(try_name);
+ break;
+ }
+ }
+
+ free(buf);
+ }
+
// memory size reserved for COI buffers
env_var = getenv("MIC_BUFFERSIZE");
if (env_var != 0) {
@@ -4073,6 +5294,30 @@ static void __offload_init_library_once(void)
}
}
+ // memory size reserved for 4K pages for COI buffers
+ env_var = getenv("MIC_4K_BUFFER_RESERVE_SIZE");
+ if (env_var != 0) {
+ uint64_t new_size;
+ if (__offload_parse_size_string(env_var, new_size)) {
+ mic_4k_buffer_size = new_size;
+ }
+ else {
+ LIBOFFLOAD_ERROR(c_invalid_env_var_value, "MIC_4K_BUFFER_RESERVE_SIZE");
+ }
+ }
+
+ // memory size reserved for 2M pages for COI buffers
+ env_var = getenv("MIC_2M_BUFFER_RESERVE_SIZE");
+ if (env_var != 0) {
+ uint64_t new_size;
+ if (__offload_parse_size_string(env_var, new_size)) {
+ mic_2m_buffer_size = new_size;
+ }
+ else {
+ LIBOFFLOAD_ERROR(c_invalid_env_var_value, "MIC_2M_BUFFER_RESERVE_SIZE");
+ }
+ }
+
// determine stacksize for the pipeline on the device
env_var = getenv("MIC_STACKSIZE");
if (env_var != 0 && *env_var != '\0') {
@@ -4170,11 +5415,9 @@ static void __offload_init_library_once(void)
else if (strcmp(env_var, "on_offload_all") == 0) {
__offload_init_type = c_init_on_offload_all;
}
-#ifndef TARGET_WINNT
else if (strcmp(env_var, "on_start") == 0) {
__offload_init_type = c_init_on_start;
}
-#endif // TARGET_WINNT
else {
LIBOFFLOAD_ERROR(c_invalid_env_var_value, offload_init_envname);
}
@@ -4206,6 +5449,32 @@ static void __offload_init_library_once(void)
}
}
+ // parallel copy of offload_transfer
+ env_var = getenv(parallel_copy_envname);
+ if (env_var != 0 && *env_var != '\0') {
+ int64_t new_val;
+ if (__offload_parse_int_string(env_var, new_val) && new_val >= 0) {
+ __offload_parallel_copy = new_val;
+ }
+ else {
+ LIBOFFLOAD_ERROR(c_invalid_env_var_value,
+ parallel_copy_envname);
+ }
+ }
+
+ // use COI interface for noncontiguous arrays transfer
+ env_var = getenv(use_coi_noncontiguous_transfer_envname);
+ if (env_var != 0 && *env_var != '\0') {
+ uint64_t new_size;
+ if (__offload_parse_size_string(env_var, new_size)) {
+ __offload_use_coi_noncontiguous_transfer = new_size;
+ }
+ else {
+ LIBOFFLOAD_ERROR(c_invalid_env_var_value,
+ use_coi_noncontiguous_transfer_envname);
+ }
+ }
+
// init ORSL
ORSL::init();
}
@@ -4242,7 +5511,20 @@ extern int __offload_init_library(void)
return is_available;
}
-extern "C" void __offload_register_image(const void *target_image)
+extern "C" bool __offload_target_image_is_executable(const void *target_image)
+{
+ const struct Image *image = static_cast<const struct Image*>(target_image);
+
+ // decode image
+ const char *name = image->data;
+ const void *data = image->data + strlen(image->data) + 1;
+
+ // determine image type
+ const Elf64_Ehdr *hdr = static_cast<const Elf64_Ehdr*>(data);
+ return (hdr->e_type == ET_EXEC);
+}
+
+extern "C" bool __offload_register_image(const void *target_image)
{
const struct Image *image = static_cast<const struct Image*>(target_image);
@@ -4250,8 +5532,32 @@ extern "C" void __offload_register_image(const void *target_image)
const char *name = image->data;
const void *data = image->data + strlen(image->data) + 1;
uint64_t size = image->size;
- const char *origin = 0;
+ char *origin = (char *) malloc(strlen(image->data) + 1);
uint64_t offset = 0;
+ const char *host_name = image->data;
+ int i;
+
+ if (origin == NULL)
+ LIBOFFLOAD_ERROR(c_malloc);
+
+ // The origin name is the name of the file on the host
+ // this is used by Vtune, since it is a fat binary we
+ // use the host file name of the fat binary.
+ // Driver prepends the host file name ending with "?"
+ // to the image->data name so need to extract the string
+ i = 0;
+ while (*host_name != '\0' && *host_name != '?') {
+ origin[i] = *host_name;
+ host_name++;
+ i++;
+ }
+ origin[i] = '\0';
+ // Implies the host name does not exist which really should
+ // not occur. Allow this since only consumer is Vtune.
+ if ((i == 0) || (*host_name != '?')) {
+ free(origin);
+ origin = 0;
+ }
// our actions depend on the image type
const Elf64_Ehdr *hdr = static_cast<const Elf64_Ehdr*>(data);
@@ -4279,19 +5585,31 @@ extern "C" void __offload_register_image(const void *target_image)
}
}
}
- break;
+ return mic_engines_total > 0;
case ET_DYN:
- // Registration code for libraries is called from the DllMain
- // context (on windows) and thus we cannot do anything usefull
- // here. So we just add it to the list of pending libraries for
- // the later use.
- __target_libs_lock.lock();
- __target_libs = true;
- __target_libs_list.push_back(TargetImage(name, data, size,
- origin, offset));
- __target_libs_lock.unlock();
- break;
+ {
+ char *fullname = origin;
+ // We add the library to a list of pending libraries
+ __target_libs_lock.lock();
+ __target_libs = true;
+ __target_libs_list.push_back(
+ TargetImage(name, data, size, fullname, offset));
+ __target_libs_lock.unlock();
+ // If __target_exe is set, then main has started running
+ // If not main, then we can't do anything useful here
+ // because this registration code is called from DllMain
+ // context (on windows).
+ if (__target_exe != 0) {
+ // There is no need to delay loading the library
+ if (!__offload_init_library()) {
+ // Couldn't validate library as a fat offload library
+ LIBOFFLOAD_ERROR(c_unknown_binary_type);
+ exit(1);
+ }
+ }
+ return true;
+ }
default:
// something is definitely wrong, issue an error and exit
@@ -4330,6 +5648,12 @@ extern "C" void __offload_unregister_image(const void *target_image)
__offload_fini_library();
}
+ else if (hdr->e_type == ET_DYN) {
+ for (int i = 0; i < mic_engines_total; i++) {
+ mic_engines[i].unload_library(data, name);
+ }
+
+ }
}
// Runtime trace interface for user programs
@@ -4362,19 +5686,24 @@ int _Offload_signaled(int index, void *signal)
__offload_init_library();
// check index value
- if (index < 0 || mic_engines_total <= 0) {
+ if (index < 0) {
LIBOFFLOAD_ERROR(c_offload_signaled1, index);
LIBOFFLOAD_ABORT;
}
+ index %= mic_engines_total;
+
// find associated async task
OffloadDescriptor *task =
- mic_engines[index % mic_engines_total].find_signal(signal, false);
+ mic_engines[index].find_signal(signal, false);
if (task == 0) {
LIBOFFLOAD_ERROR(c_offload_signaled2, signal);
LIBOFFLOAD_ABORT;
}
-
+ // if signal is removed by wait completing
+ else if (task == SIGNAL_IS_REMOVED) {
+ return (true);
+ }
return task->is_signaled();
}
@@ -4386,6 +5715,153 @@ void _Offload_report(int val)
}
}
+int _Offload_find_associated_mic_memory(
+ int target,
+ const void* cpu_addr,
+ void** cpu_base_addr,
+ uint64_t* buf_length,
+ void** mic_addr,
+ uint64_t* mic_buf_start_offset,
+ int* is_static
+)
+{
+ __offload_init_library();
+
+ // check target value
+ if (target < 0) {
+ LIBOFFLOAD_ERROR(c_offload_signaled1, target);
+ LIBOFFLOAD_ABORT;
+ }
+ target %= mic_engines_total;
+
+ // find existing association in pointer table
+ PtrData* ptr_data = mic_engines[target].find_ptr_data(cpu_addr);
+ if (ptr_data == 0) {
+ OFFLOAD_TRACE(3, "Association does not exist\n");
+ return 0;
+ }
+
+ OFFLOAD_TRACE(3, "Found association: base %p, length %lld, is_static %d\n",
+ ptr_data->cpu_addr.start(), ptr_data->cpu_addr.length(),
+ ptr_data->is_static);
+
+ if (ptr_data->mic_buf != 0 && ptr_data->mic_addr == 0) {
+ COIRESULT res = COI::BufferGetSinkAddress(ptr_data->mic_buf,
+ &ptr_data->mic_addr);
+ if (res != COI_SUCCESS) {
+ return 0;
+ }
+ }
+ *cpu_base_addr = const_cast<void *>(ptr_data->cpu_addr.start());
+ *buf_length = ptr_data->cpu_addr.length() - ptr_data->alloc_disp;
+ *mic_addr = (void *)(ptr_data->mic_addr + ptr_data->mic_offset);
+ *mic_buf_start_offset = ptr_data->alloc_disp;
+ *is_static = ptr_data->is_static;
+ return ptr_data->is_static ? 1 : ptr_data->get_reference();
+}
+
+_Offload_stream _Offload_stream_create(
+ int device, // MIC device number
+ int number_of_cpus // Cores allocated to the stream
+ )
+{
+ __offload_init_library();
+
+ // check target value
+ if (device < 0) {
+ LIBOFFLOAD_ERROR(c_offload_signaled1, device);
+ LIBOFFLOAD_ABORT;
+ }
+ device %= mic_engines_total;
+
+ // Create new stream and get its handle
+ _Offload_stream handle = Stream::add_stream(device, number_of_cpus);
+ if (handle == 0) {
+ OFFLOAD_TRACE(3, "Can't create stream\n");
+ return 0;
+ }
+
+ // create pipeline associated with the new stream
+ mic_engines[device].get_pipeline(handle);
+
+ return(handle);
+}
+
+int _Offload_stream_destroy(
+ int device, // MIC device number
+ _Offload_stream handle // stream to destroy
+ )
+{
+ __offload_init_library();
+
+ // check target value
+ if (device < 0) {
+ LIBOFFLOAD_ERROR(c_offload_signaled1, device);
+ LIBOFFLOAD_ABORT;
+ }
+ device %= mic_engines_total;
+
+ mic_engines[device].stream_destroy(handle);
+
+ return(true);
+}
+
+int _Offload_stream_completed(int device, _Offload_stream handler)
+{
+ __offload_init_library();
+
+ // check index value
+ if (device < 0) {
+ LIBOFFLOAD_ERROR(c_offload_signaled1, device);
+ LIBOFFLOAD_ABORT;
+ }
+
+ device %= mic_engines_total;
+
+ // get stream
+ Stream * stream;
+
+ if (handler != 0) {
+ stream = Stream::find_stream(handler, false);
+
+ // the stream was not created or was destroyed
+ if (!stream) {
+ LIBOFFLOAD_ERROR(c_offload_no_stream, device);
+ LIBOFFLOAD_ABORT;
+ }
+
+ // find associated async task
+ OffloadDescriptor *task = stream->get_last_offload();
+
+ // offload was completed by offload_wait pragma or wait clause
+ if (task == 0) {
+ return(true);
+ }
+ return task->is_signaled();
+ }
+ // zero handler is for all streams at the device
+ else {
+ StreamMap stream_map = Stream::all_streams;
+ for (StreamMap::iterator it = stream_map.begin();
+ it != stream_map.end(); it++) {
+ Stream * stream = it->second;
+ // find associated async task
+ OffloadDescriptor *task = stream->get_last_offload();
+
+ // offload was completed by offload_wait pragma or wait clause
+ if (task == 0) {
+ return(true);
+ }
+ // if even one stream is not completed result is false
+ if (!task->is_signaled()) {
+ return false;
+ }
+ }
+ // no uncompleted streams
+ return true;
+ }
+}
+
// IDB support
int __dbg_is_attached = 0;
int __dbg_target_id = -1;
diff --git a/liboffloadmic/runtime/offload_host.h b/liboffloadmic/runtime/offload_host.h
index 2212dec51494..afd5c9936ca1 100644
--- a/liboffloadmic/runtime/offload_host.h
+++ b/liboffloadmic/runtime/offload_host.h
@@ -1,5 +1,5 @@
/*
- Copyright (c) 2014 Intel Corporation. All Rights Reserved.
+ Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
@@ -46,8 +46,12 @@
#include "coi/coi_client.h"
// MIC engines.
-extern Engine* mic_engines;
-extern uint32_t mic_engines_total;
+DLL_LOCAL extern Engine* mic_engines;
+DLL_LOCAL extern uint32_t mic_engines_total;
+
+// DMA channel count used by COI and set via
+// OFFLOAD_DMA_CHANNEL_COUNT environment variable
+DLL_LOCAL extern uint32_t mic_dma_channel_count;
//! The target image is packed as follows.
/*! 1. 8 bytes containing the size of the target binary */
@@ -64,6 +68,13 @@ struct Image {
class OffloadDescriptor
{
public:
+ enum OmpAsyncLastEventType {
+ c_last_not, // not last event
+ c_last_write, // the last event that is write
+ c_last_read, // the last event that is read
+ c_last_runfunc // the last event that is runfunction
+ };
+
OffloadDescriptor(
int index,
_Offload_status *status,
@@ -71,7 +82,7 @@ public:
bool is_openmp,
OffloadHostTimerData * timer_data
) :
- m_device(mic_engines[index % mic_engines_total]),
+ m_device(mic_engines[index == -1 ? 0 : index % mic_engines_total]),
m_is_mandatory(is_mandatory),
m_is_openmp(is_openmp),
m_inout_buf(0),
@@ -79,13 +90,22 @@ public:
m_func_desc_size(0),
m_in_deps(0),
m_in_deps_total(0),
+ m_in_deps_allocated(0),
m_out_deps(0),
m_out_deps_total(0),
+ m_out_deps_allocated(0),
m_vars(0),
m_vars_extra(0),
m_status(status),
- m_timer_data(timer_data)
- {}
+ m_timer_data(timer_data),
+ m_out_with_preallocated(false),
+ m_preallocated_alloc(false),
+ m_traceback_called(false),
+ m_stream(-1),
+ m_omp_async_last_event_type(c_last_not)
+ {
+ m_wait_all_devices = index == -1;
+ }
~OffloadDescriptor()
{
@@ -107,8 +127,10 @@ public:
bool offload(const char *name, bool is_empty,
VarDesc *vars, VarDesc2 *vars2, int vars_total,
const void **waits, int num_waits, const void **signal,
- int entry_id, const void *stack_addr);
- bool offload_finish();
+ int entry_id, const void *stack_addr,
+ OffloadFlags offload_flags);
+
+ bool offload_finish(bool is_traceback);
bool is_signaled();
@@ -116,36 +138,60 @@ public:
return m_timer_data;
}
+ void set_stream(_Offload_stream stream) {
+ m_stream = stream;
+ }
+
+ _Offload_stream get_stream() {
+ return(m_stream);
+ }
+
private:
- bool wait_dependencies(const void **waits, int num_waits);
+ bool offload_wrap(const char *name, bool is_empty,
+ VarDesc *vars, VarDesc2 *vars2, int vars_total,
+ const void **waits, int num_waits, const void **signal,
+ int entry_id, const void *stack_addr,
+ OffloadFlags offload_flags);
+ bool wait_dependencies(const void **waits, int num_waits,
+ _Offload_stream stream);
bool setup_descriptors(VarDesc *vars, VarDesc2 *vars2, int vars_total,
int entry_id, const void *stack_addr);
bool setup_misc_data(const char *name);
- bool send_pointer_data(bool is_async);
+ bool send_pointer_data(bool is_async, void* info);
bool send_noncontiguous_pointer_data(
int i,
PtrData* src_buf,
PtrData* dst_buf,
- COIEVENT *event);
- bool recieve_noncontiguous_pointer_data(
+ COIEVENT *event,
+ uint64_t &sent_data,
+ uint32_t in_deps_amount,
+ COIEVENT *in_deps
+ );
+ bool receive_noncontiguous_pointer_data(
int i,
- char* src_data,
COIBUFFER dst_buf,
- COIEVENT *event);
+ COIEVENT *event,
+ uint64_t &received_data,
+ uint32_t in_deps_amount,
+ COIEVENT *in_deps
+ );
bool gather_copyin_data();
- bool compute();
+ bool compute(void *);
- bool receive_pointer_data(bool is_async);
+ bool receive_pointer_data(bool is_async, bool first_run, void * info);
bool scatter_copyout_data();
void cleanup();
bool find_ptr_data(PtrData* &ptr_data, void *base, int64_t disp,
- int64_t length, bool error_does_not_exist = true);
+ int64_t length, bool is_targptr,
+ bool error_does_not_exist = true);
bool alloc_ptr_data(PtrData* &ptr_data, void *base, int64_t disp,
- int64_t length, int64_t alloc_disp, int align);
+ int64_t length, int64_t alloc_disp, int align,
+ bool is_targptr, bool is_prealloc, bool pin);
+ bool create_preallocated_buffer(PtrData* ptr_data, void *base);
bool init_static_ptr_data(PtrData *ptr_data);
bool init_mic_address(PtrData *ptr_data);
bool offload_stack_memory_manager(const void * stack_begin, int routine_id,
@@ -154,9 +200,15 @@ private:
bool gen_var_descs_for_pointer_array(int i);
+ void get_stream_in_dependencies(uint32_t &in_deps_amount,
+ COIEVENT* &in_deps);
+
void report_coi_error(error_types msg, COIRESULT res);
_Offload_result translate_coi_error(COIRESULT res) const;
-
+
+ void setup_omp_async_info();
+ void register_omp_event_call_back(const COIEVENT *event, const void *info);
+
private:
typedef std::list<COIBUFFER> BufferList;
@@ -167,10 +219,12 @@ private:
AutoData* auto_data;
int64_t cpu_disp;
int64_t cpu_offset;
+ void *alloc;
CeanReadRanges *read_rng_src;
CeanReadRanges *read_rng_dst;
int64_t ptr_arr_offset;
bool is_arr_ptr_el;
+ OmpAsyncLastEventType omp_last_event_type;
};
template<typename T> class ReadArrElements {
@@ -230,6 +284,9 @@ private:
// Engine
Engine& m_device;
+ // true for offload_wait target(mic) stream(0)
+ bool m_wait_all_devices;
+
// if true offload is mandatory
bool m_is_mandatory;
@@ -266,8 +323,13 @@ private:
// Dependencies
COIEVENT *m_in_deps;
uint32_t m_in_deps_total;
+ uint32_t m_in_deps_allocated;
COIEVENT *m_out_deps;
uint32_t m_out_deps_total;
+ uint32_t m_out_deps_allocated;
+
+ // Stream
+ _Offload_stream m_stream;
// Timer data
OffloadHostTimerData *m_timer_data;
@@ -279,6 +341,25 @@ private:
// a boolean value calculated in setup_descriptors. If true we need to do
// a run function on the target. Otherwise it may be optimized away.
bool m_need_runfunction;
+
+ // initialized value of m_need_runfunction;
+ // is used to recognize offload_transfer
+ bool m_initial_need_runfunction;
+
+ // a Boolean value set to true when OUT clauses with preallocated targetptr
+ // is encountered to indicate that call receive_pointer_data needs to be
+ // invoked again after call to scatter_copyout_data.
+ bool m_out_with_preallocated;
+
+ // a Boolean value set to true if an alloc_if(1) is used with preallocated
+ // targetptr to indicate the need to scatter_copyout_data even for
+ // async offload
+ bool m_preallocated_alloc;
+
+ // a Boolean value set to true if traceback routine is called
+ bool m_traceback_called;
+
+ OmpAsyncLastEventType m_omp_async_last_event_type;
};
// Initialization types for MIC
@@ -288,46 +369,60 @@ enum OffloadInitType {
c_init_on_offload_all // all devices before starting the first offload
};
+// Determines if MIC code is an executable or a shared library
+extern "C" bool __offload_target_image_is_executable(const void *target_image);
+
// Initializes library and registers specified offload image.
-extern "C" void __offload_register_image(const void* image);
+extern "C" bool __offload_register_image(const void* image);
extern "C" void __offload_unregister_image(const void* image);
// Initializes offload runtime library.
-extern int __offload_init_library(void);
+DLL_LOCAL extern int __offload_init_library(void);
// thread data for associating pipelines with threads
-extern pthread_key_t mic_thread_key;
+DLL_LOCAL extern pthread_key_t mic_thread_key;
+
+// location of offload_main executable
+// To be used if the main application has no offload and is not built
+// with -offload but dynamic library linked in has offload pragma
+DLL_LOCAL extern char* mic_device_main;
// Environment variables for devices
-extern MicEnvVar mic_env_vars;
+DLL_LOCAL extern MicEnvVar mic_env_vars;
// CPU frequency
-extern uint64_t cpu_frequency;
+DLL_LOCAL extern uint64_t cpu_frequency;
// LD_LIBRARY_PATH for MIC libraries
-extern char* mic_library_path;
+DLL_LOCAL extern char* mic_library_path;
// stack size for target
-extern uint32_t mic_stack_size;
+DLL_LOCAL extern uint32_t mic_stack_size;
// Preallocated memory size for buffers on MIC
-extern uint64_t mic_buffer_size;
+DLL_LOCAL extern uint64_t mic_buffer_size;
+
+// Preallocated 4K page memory size for buffers on MIC
+DLL_LOCAL extern uint64_t mic_4k_buffer_size;
+
+// Preallocated 2M page memory size for buffers on MIC
+DLL_LOCAL extern uint64_t mic_2m_buffer_size;
// Setting controlling inout proxy
-extern bool mic_proxy_io;
-extern char* mic_proxy_fs_root;
+DLL_LOCAL extern bool mic_proxy_io;
+DLL_LOCAL extern char* mic_proxy_fs_root;
// Threshold for creating buffers with large pages
-extern uint64_t __offload_use_2mb_buffers;
+DLL_LOCAL extern uint64_t __offload_use_2mb_buffers;
// offload initialization type
-extern OffloadInitType __offload_init_type;
+DLL_LOCAL extern OffloadInitType __offload_init_type;
// Device number to offload to when device is not explicitly specified.
-extern int __omp_device_num;
+DLL_LOCAL extern int __omp_device_num;
// target executable
-extern TargetImage* __target_exe;
+DLL_LOCAL extern TargetImage* __target_exe;
// IDB support
diff --git a/liboffloadmic/runtime/offload_iterator.h b/liboffloadmic/runtime/offload_iterator.h
new file mode 100644
index 000000000000..f5922b4e808b
--- /dev/null
+++ b/liboffloadmic/runtime/offload_iterator.h
@@ -0,0 +1,103 @@
+/*
+ Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+
+/*! \file
+ \brief Iterator of Variable tables list used by the runtime library
+*/
+
+#ifndef OFFLOAD_ITERATOR_H_INCLUDED
+#define OFFLOAD_ITERATOR_H_INCLUDED
+
+#include <iterator>
+#include "offload_table.h"
+
+// The following class is for iteration over var table.
+// It was extracted and moved to this offload_iterator.h file from offload_table.h
+// to solve the problem with compiling with VS 2010. The problem was in incompatibility
+// of STL objects in VS 2010 with ones in later VS versions.
+
+// var table list iterator
+class Iterator : public std::iterator<std::input_iterator_tag,
+ VarTable::Entry> {
+ public:
+ Iterator() : m_node(0), m_entry(0) {}
+
+ explicit Iterator(TableList<VarTable>::Node *node) {
+ new_node(node);
+ }
+
+ Iterator& operator++() {
+ if (m_entry != 0) {
+ m_entry++;
+ while (m_entry->name == 0) {
+ m_entry++;
+ }
+ if (m_entry->name == reinterpret_cast<const char*>(-1)) {
+ new_node(m_node->next);
+ }
+ }
+ return *this;
+ }
+
+ bool operator==(const Iterator &other) const {
+ return m_entry == other.m_entry;
+ }
+
+ bool operator!=(const Iterator &other) const {
+ return m_entry != other.m_entry;
+ }
+
+ const VarTable::Entry* operator*() const {
+ return m_entry;
+ }
+
+ private:
+ void new_node(TableList<VarTable>::Node *node) {
+ m_node = node;
+ m_entry = 0;
+ while (m_node != 0) {
+ m_entry = m_node->table.entries;
+ while (m_entry->name == 0) {
+ m_entry++;
+ }
+ if (m_entry->name != reinterpret_cast<const char*>(-1)) {
+ break;
+ }
+ m_node = m_node->next;
+ m_entry = 0;
+ }
+ }
+
+ private:
+ TableList<VarTable>::Node *m_node;
+ const VarTable::Entry *m_entry;
+};
+
+#endif // OFFLOAD_ITERATOR_H_INCLUDED
diff --git a/liboffloadmic/runtime/offload_myo_host.cpp b/liboffloadmic/runtime/offload_myo_host.cpp
index 987d077957a1..621494906413 100644
--- a/liboffloadmic/runtime/offload_myo_host.cpp
+++ b/liboffloadmic/runtime/offload_myo_host.cpp
@@ -1,5 +1,5 @@
/*
- Copyright (c) 2014 Intel Corporation. All Rights Reserved.
+ Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
@@ -28,14 +28,15 @@
*/
+#if defined(LINUX) || defined(FREEBSD)
+#include <mm_malloc.h>
+#endif
+
#include "offload_myo_host.h"
#include <errno.h>
#include <malloc.h>
#include "offload_host.h"
-
-#if defined(LINUX) || defined(FREEBSD)
-#include <mm_malloc.h>
-#endif
+//#include "offload_util.h"
#define MYO_VERSION1 "MYO_1.0"
@@ -47,11 +48,7 @@ extern "C" void __cilkrts_cilk_for_64(void*, void*, uint64_t, int32_t);
#pragma weak __cilkrts_cilk_for_64
#endif // TARGET_WINNT
-#ifdef TARGET_WINNT
-#define MYO_TABLE_END_MARKER() reinterpret_cast<const char*>(-1)
-#else // TARGET_WINNT
-#define MYO_TABLE_END_MARKER() reinterpret_cast<const char*>(0)
-#endif // TARGET_WINNT
+static void __offload_myoProcessDeferredTables();
class MyoWrapper {
public:
@@ -140,7 +137,7 @@ public:
CheckResult(__func__, m_remote_thunk_call(thunk, args, device));
}
- MyoiRFuncCallHandle RemoteCall(char *func, void *args, int device) const {
+ MyoiRFuncCallHandle RemoteCall(const char *func, void *args, int device) const {
OFFLOAD_DEBUG_TRACE(4, "%s(%s, %p, %d)\n", __func__, func, args,
device);
return m_remote_call(func, args, device);
@@ -151,6 +148,73 @@ public:
CheckResult(__func__, m_get_result(handle));
}
+ bool PostInitFuncSupported() const {
+ OFFLOAD_DEBUG_TRACE(4, "%s()\n", __func__);
+ if (m_feature_available) {
+ return m_feature_available(MYO_FEATURE_POST_LIB_INIT) ==
+ MYO_SUCCESS;
+ } else {
+ return false;
+ }
+ }
+
+ void CreateVtableArena();
+
+ MyoArena GetVtableArena()const {
+ return m_vtable_arena;
+ }
+
+ void ArenaCreate(
+ MyoOwnershipType ownership,
+ int consistency,
+ MyoArena* arena
+ ) const
+ {
+ OFFLOAD_DEBUG_TRACE(4, "%s(%d, %d, %p)\n",
+ __func__, ownership, consistency, arena);
+ CheckResult(__func__, m_arena_create(ownership, consistency, arena));
+ }
+
+ void* SharedAlignedArenaMalloc(
+ MyoArena arena,
+ size_t size,
+ size_t align
+ ) const
+ {
+ OFFLOAD_DEBUG_TRACE_1(4, 0, c_offload_myosharedalignedarenamalloc,
+ "%s(%u, %lld, %lld)\n",
+ __func__, arena, size, align);
+ return m_arena_aligned_malloc(arena, size, align);
+ }
+
+ void* SharedAlignedArenaFree(
+ MyoArena arena,
+ void* ptr
+ ) const
+ {
+ OFFLOAD_DEBUG_TRACE_1(4, 0, c_offload_myosharedalignedarenafree,
+ "%s(%u, %p)\n", __func__, arena, ptr);
+ return m_arena_aligned_free(arena, ptr);
+ }
+
+ void ArenaAcquire(
+ MyoArena arena
+ ) const
+ {
+ OFFLOAD_DEBUG_TRACE_1(4, 0, c_offload_myoarenaacquire,
+ "%s()\n", __func__);
+ CheckResult(__func__, m_arena_acquire(arena));
+ }
+
+ void ArenaRelease(
+ MyoArena arena
+ ) const
+ {
+ OFFLOAD_DEBUG_TRACE_1(4, 0, c_offload_myoarenarelease,
+ "%s()\n", __func__);
+ CheckResult(__func__, m_arena_release(arena));
+ }
+
private:
void CheckResult(const char *func, MyoError error) const {
if (error != MYO_SUCCESS) {
@@ -160,8 +224,10 @@ private:
}
private:
- void* m_lib_handle;
- bool m_is_available;
+ void* m_lib_handle;
+ bool m_is_available;
+ int m_post_init_func;
+ MyoArena m_vtable_arena;
// pointers to functions from myo library
MyoError (*m_lib_init)(void*, void*);
@@ -175,11 +241,18 @@ private:
MyoError (*m_host_var_table_propagate)(void*, int);
MyoError (*m_host_fptr_table_register)(void*, int, int);
MyoError (*m_remote_thunk_call)(void*, void*, int);
- MyoiRFuncCallHandle (*m_remote_call)(char*, void*, int);
+ MyoiRFuncCallHandle (*m_remote_call)(const char*, void*, int);
MyoError (*m_get_result)(MyoiRFuncCallHandle);
+ MyoError (*m_arena_create)(MyoOwnershipType, int, MyoArena*);
+ void* (*m_arena_aligned_malloc)(MyoArena, size_t, size_t);
+ void* (*m_arena_aligned_free)(MyoArena, void*);
+ MyoError (*m_arena_acquire)(MyoArena);
+ MyoError (*m_arena_release)(MyoArena);
+ // Placeholder until MYO headers support enum type for feature
+ MyoError (*m_feature_available)(int feature);
};
-bool MyoWrapper::LoadLibrary(void)
+DLL_LOCAL bool MyoWrapper::LoadLibrary(void)
{
#ifndef TARGET_WINNT
const char *lib_name = "libmyo-client.so";
@@ -295,7 +368,7 @@ bool MyoWrapper::LoadLibrary(void)
return false;
}
- m_remote_call = (MyoiRFuncCallHandle (*)(char*, void*, int))
+ m_remote_call = (MyoiRFuncCallHandle (*)(const char*, void*, int))
DL_sym(m_lib_handle, "myoiRemoteCall", MYO_VERSION1);
if (m_remote_call == 0) {
OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in MYO library\n",
@@ -313,8 +386,66 @@ bool MyoWrapper::LoadLibrary(void)
return false;
}
+ m_arena_create = (MyoError (*)(MyoOwnershipType, int, MyoArena*))
+ DL_sym(m_lib_handle, "myoArenaCreate", MYO_VERSION1);
+ if (m_arena_create == 0) {
+ OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in MYO library\n",
+ "myoArenaCreate");
+ UnloadLibrary();
+ return false;
+ }
+
+ m_arena_aligned_malloc = (void* (*)(MyoArena, size_t, size_t))
+ DL_sym(m_lib_handle, "myoArenaAlignedMalloc", MYO_VERSION1);
+ if (m_arena_aligned_malloc == 0) {
+ OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in MYO library\n",
+ "myoArenaAlignedMalloc");
+ UnloadLibrary();
+ return false;
+ }
+
+ m_arena_aligned_free = (void* (*)(MyoArena, void*))
+ DL_sym(m_lib_handle, "myoArenaAlignedFree", MYO_VERSION1);
+ if (m_arena_aligned_free == 0) {
+ OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in MYO library\n",
+ "myoArenaAlignedFree");
+ UnloadLibrary();
+ return false;
+ }
+
+ m_arena_acquire = (MyoError (*)(MyoArena))
+ DL_sym(m_lib_handle, "myoArenaAcquire", MYO_VERSION1);
+ if (m_acquire == 0) {
+ OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in MYO library\n",
+ "myoArenaAcquire");
+ UnloadLibrary();
+ return false;
+ }
+
+ m_arena_release = (MyoError (*)(MyoArena))
+ DL_sym(m_lib_handle, "myoArenaRelease", MYO_VERSION1);
+ if (m_release == 0) {
+ OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in MYO library\n",
+ "myoArenaRelease");
+ UnloadLibrary();
+ return false;
+ }
+
+ // Check for "feature-available" API added in MPSS 3.3.
+ // Not finding it is not an error.
+ m_feature_available = (MyoError (*)(int))
+ DL_sym(m_lib_handle, "myoiSupportsFeature", MYO_VERSION1);
+ if (m_feature_available == 0) {
+ OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in MYO library\n",
+ "myoiSupportsFeature");
+ }
+
OFFLOAD_DEBUG_TRACE(2, "The library was successfully loaded\n");
+ // Create arena if supported
+ CreateVtableArena();
+ OFFLOAD_DEBUG_TRACE(3, "Vtable arena created\n");
+
m_is_available = true;
return true;
@@ -323,6 +454,23 @@ bool MyoWrapper::LoadLibrary(void)
static bool myo_is_available;
static MyoWrapper myo_wrapper;
+void MyoWrapper::CreateVtableArena()
+{
+ MyoArena* vtable_arena;
+
+ // Check if this MYO supports arenas for vtables
+ if (myo_wrapper.PostInitFuncSupported()) {
+ // Create arena for vtables
+ vtable_arena = (MyoArena *)myo_wrapper.SharedMalloc(sizeof(MyoArena));
+ myo_wrapper.ArenaCreate(
+ MYO_ARENA_OURS, MYO_NO_CONSISTENCY, vtable_arena);
+ m_vtable_arena = *vtable_arena;
+ OFFLOAD_DEBUG_TRACE(4, "created arena = %d\n", m_vtable_arena);
+ } else {
+ m_vtable_arena = 0;
+ }
+}
+
struct MyoTable
{
MyoTable(SharedTableEntry *tab, int len) : var_tab(tab), var_tab_len(len)
@@ -337,9 +485,11 @@ static MyoTableList __myo_table_list;
static mutex_t __myo_table_lock;
static bool __myo_tables = false;
-static void __offload_myo_shared_table_register(SharedTableEntry *entry);
-static void __offload_myo_shared_init_table_register(InitTableEntry* entry);
-static void __offload_myo_fptr_table_register(FptrTableEntry *entry);
+static void __offload_myo_shared_vtable_process(SharedTableEntry *entry);
+static void __offload_myo_shared_table_process(SharedTableEntry *entry);
+static void __offload_myo_shared_init_table_process(InitTableEntry* entry);
+static void __offload_myo_fptr_table_process(FptrTableEntry *entry);
+static void __offload_propagate_shared_vars();
static void __offload_myoLoadLibrary_once(void)
{
@@ -350,6 +500,7 @@ static void __offload_myoLoadLibrary_once(void)
static bool __offload_myoLoadLibrary(void)
{
+ OFFLOAD_DEBUG_TRACE(4, "__offload_myoLoadLibrary\n");
static OffloadOnceControl ctrl = OFFLOAD_ONCE_CONTROL_INIT;
__offload_run_once(&ctrl, __offload_myoLoadLibrary_once);
@@ -371,17 +522,71 @@ static void __offload_myoInit_once(void)
OFFLOAD_DEBUG_TRACE(2, "Initializing MYO library ...\n");
COIEVENT events[MIC_ENGINES_MAX];
- MyoiUserParams params[MIC_ENGINES_MAX+1];
- // load target library to all devices
+ // One entry per device +
+ // A pair of entries for the Host postInit func +
+ // A pair of entries for the MIC postInit func +
+ // end marker
+ MyoiUserParams params[MIC_ENGINES_MAX+5];
+
+ // Load target library to all devices and
+ // create libinit parameters for all devices
for (int i = 0; i < mic_engines_total; i++) {
mic_engines[i].init_myo(&events[i]);
params[i].type = MYOI_USERPARAMS_DEVID;
params[i].nodeid = mic_engines[i].get_physical_index() + 1;
+ OFFLOAD_DEBUG_TRACE(2, "params[%d] = { %d, %d }\n",
+ i, params[i].type, params[i].nodeid);
}
- params[mic_engines_total].type = MYOI_USERPARAMS_LAST_MSG;
+ // Check if V2 myoLibInit is available
+ if (myo_wrapper.PostInitFuncSupported()) {
+ // Set the host post libInit function indicator
+ params[mic_engines_total].type =
+ MYOI_USERPARAMS_POST_MYO_LIB_INIT_FUNC;
+ params[mic_engines_total].nodeid =
+ MYOI_USERPARAMS_POST_MYO_LIB_INIT_FUNC_HOST_NODE;
+ OFFLOAD_DEBUG_TRACE(2, "params[%d] = { %d, %d }\n",
+ mic_engines_total,
+ params[mic_engines_total].type, params[mic_engines_total].nodeid);
+
+ // Set the host post libInit host function address
+ ((MyoiUserParamsPostLibInit*)(&params[mic_engines_total+1]))->
+ postLibInitHostFuncAddress =
+ (void (*)())&__offload_propagate_shared_vars;
+ OFFLOAD_DEBUG_TRACE(2, "params[%d] = { %p }\n",
+ mic_engines_total+1,
+ ((MyoiUserParamsPostLibInit*)(&params[mic_engines_total+1]))->
+ postLibInitHostFuncAddress);
+
+ // Set the target post libInit function indicator
+ params[mic_engines_total+2].type =
+ MYOI_USERPARAMS_POST_MYO_LIB_INIT_FUNC;
+ params[mic_engines_total+2].nodeid =
+ MYOI_USERPARAMS_POST_MYO_LIB_INIT_FUNC_ALL_NODES;
+
+ // Set the target post libInit target function name
+ ((MyoiUserParamsPostLibInit*)(&params[mic_engines_total+3]))->
+ postLibInitRemoveFuncName = "--vtable_initializer--";
+ OFFLOAD_DEBUG_TRACE(2, "params[%d] = { %s }\n",
+ mic_engines_total+3,
+ ((MyoiUserParamsPostLibInit*)(&params[mic_engines_total+1]))->
+ postLibInitRemoveFuncName);
+
+ params[mic_engines_total+4].type = MYOI_USERPARAMS_LAST_MSG;
+ params[mic_engines_total+4].nodeid = 0;
+ OFFLOAD_DEBUG_TRACE(2, "params[%d] = { %d, %d }\n",
+ mic_engines_total+4,
+ params[mic_engines_total+4].type,
+ params[mic_engines_total+4].nodeid);
+ } else {
+ params[mic_engines_total].type = MYOI_USERPARAMS_LAST_MSG;
+ params[mic_engines_total].nodeid = 0;
+ OFFLOAD_DEBUG_TRACE(2, "params[%d] = { %d, %d }\n",
+ mic_engines_total,
+ params[mic_engines_total].type, params[mic_engines_total].nodeid);
+ }
// initialize myo runtime on host
myo_wrapper.LibInit(params, 0);
@@ -395,6 +600,7 @@ static void __offload_myoInit_once(void)
}
myo_is_available = true;
+ OFFLOAD_DEBUG_TRACE(2, "setting myo_is_available=%d\n", myo_is_available);
OFFLOAD_DEBUG_TRACE(2, "Initializing MYO library ... done\n");
}
@@ -404,12 +610,22 @@ static bool __offload_myoInit(void)
static OffloadOnceControl ctrl = OFFLOAD_ONCE_CONTROL_INIT;
__offload_run_once(&ctrl, __offload_myoInit_once);
- // register pending shared var tables
- if (myo_is_available && __myo_tables) {
+ // Check if using V1 myoLibInit
+ if (!myo_wrapper.PostInitFuncSupported()) {
+ __offload_propagate_shared_vars();
+ }
+
+ return myo_is_available;
+}
+
+static void __offload_propagate_shared_vars()
+{
+ // Propagate pending shared var tables
+ if (__myo_tables) {
mutex_locker_t locker(__myo_table_lock);
if (__myo_tables) {
- // Register tables with MYO so it can propagate to target.
+ // Give tables with MYO so it can propagate to target
for(MyoTableList::const_iterator it = __myo_table_list.begin();
it != __myo_table_list.end(); ++it) {
#ifdef TARGET_WINNT
@@ -419,6 +635,8 @@ static bool __offload_myoInit(void)
continue;
}
myo_wrapper.HostVarTablePropagate(entry, 1);
+ OFFLOAD_DEBUG_TRACE(2, "HostVarTablePropagate(%s, 1)\n",
+ entry->varName);
}
#else // TARGET_WINNT
myo_wrapper.HostVarTablePropagate(it->var_tab,
@@ -430,8 +648,6 @@ static bool __offload_myoInit(void)
__myo_tables = false;
}
}
-
- return myo_is_available;
}
static bool shared_table_entries(
@@ -485,13 +701,164 @@ extern "C" void __offload_myoRegisterTables(
__offload_myoLoadLibrary();
// register tables
- __offload_myo_shared_table_register(shared_table);
- __offload_myo_fptr_table_register(fptr_table);
- __offload_myo_shared_init_table_register(init_table);
+ __offload_myo_shared_table_process(shared_table);
+ __offload_myo_fptr_table_process(fptr_table);
+ __offload_myo_shared_init_table_process(init_table);
}
}
-void __offload_myoFini(void)
+extern "C" bool __offload_myoProcessTables(
+ const void* image,
+ MYOInitTableList::Node *init_table,
+ MYOVarTableList::Node *shared_table,
+ MYOVarTableList::Node *shared_vtable,
+ MYOFuncTableList::Node *fptr_table
+)
+{
+ OFFLOAD_DEBUG_TRACE(3, "%s\n", __func__);
+
+ // Collect the tables in this .dll/.so
+ __offload_myoRegisterTables1(
+ init_table, shared_table, shared_vtable, fptr_table);
+
+ // Now check what type of module we are dealing with
+ if (__offload_target_image_is_executable(image)) {
+ OFFLOAD_DEBUG_TRACE(2, "Main encountered\n");
+ OFFLOAD_DEBUG_TRACE(2, "MYO initialization not deferred\n");
+ // MYO tables across dlls have been collected
+ // Now init MYO and process the tables
+ __offload_myoProcessDeferredTables();
+ // Return true to indicate that atexit needs to be calld by ofldbegin
+ return true;
+ } else {
+ // This is a shared library, either auto-loaded or dynamically loaded
+ // If __target_exe is set, then main has started running
+ if (__target_exe != 0) {
+ // Main is running: this is a dynamic load of a shared library
+ // Finish processing the tables in this library
+ OFFLOAD_DEBUG_TRACE(2,
+ "Dynamically loaded shared library encountered\n");
+ OFFLOAD_DEBUG_TRACE(2,
+ "MYO initialization not deferred\n");
+ __offload_myoProcessDeferredTables();
+ } else {
+ // Main is not running: this is an auto-loaded shared library
+ // Tables have been collected, nothing else to do
+ OFFLOAD_DEBUG_TRACE(2,
+ "Auto-loaded shared library encountered\n");
+ OFFLOAD_DEBUG_TRACE(2, "Deferring initialization of MYO\n");
+ }
+ return false;
+ }
+}
+
+// Process contents of all Var tables
+void MYOVarTableList::process()
+{
+ OFFLOAD_DEBUG_TRACE(2, "Process MYO Var tables:\n");
+
+ m_lock.lock();
+
+ for (Node *n = m_head; n != 0; n = n->next) {
+ __offload_myo_shared_table_process(
+ (SharedTableEntry*)n->table.entries);
+ }
+ for (Node *n = m_head; n != 0; n = n->next) {
+ remove_table(n);
+ }
+
+ m_lock.unlock();
+}
+
+// Process contents of all Var tables
+void MYOVarTableList::process_vtable()
+{
+ OFFLOAD_DEBUG_TRACE(2, "Process MYO Vtable tables:\n");
+
+ m_lock.lock();
+
+ for (Node *n = m_head; n != 0; n = n->next) {
+ __offload_myo_shared_vtable_process(
+ (SharedTableEntry*)n->table.entries);
+ }
+ for (Node *n = m_head; n != 0; n = n->next) {
+ remove_table(n);
+ }
+
+ m_lock.unlock();
+}
+
+// Process contents of all Func tables
+void MYOFuncTableList::process()
+{
+ OFFLOAD_DEBUG_TRACE(2, "Process MYO Func tables:\n");
+
+ m_lock.lock();
+
+ for (Node *n = m_head; n != 0; n = n->next) {
+ __offload_myo_fptr_table_process(
+ (FptrTableEntry*)n->table.entries);
+ }
+ for (Node *n = m_head; n != 0; n = n->next) {
+ remove_table(n);
+ }
+
+ m_lock.unlock();
+}
+
+// Process contents of all Init tables
+void MYOInitTableList::process()
+{
+ OFFLOAD_DEBUG_TRACE(2, "Process MYO Init tables:\n");
+
+ m_lock.lock();
+
+ for (Node *n = m_head; n != 0; n = n->next) {
+ __offload_myo_shared_init_table_process(
+ (InitTableEntry*)n->table.entries);
+ }
+ for (Node *n = m_head; n != 0; n = n->next) {
+ remove_table(n);
+ }
+
+ m_lock.unlock();
+}
+
+static void __offload_myoProcessDeferredTables()
+{
+ OFFLOAD_DEBUG_TRACE(3, "%s()\n", __func__);
+
+ // Debug dumps of MYO tables
+ if (console_enabled >= 2) {
+ __offload_myo_var_tables.dump();
+ __offload_myo_vtable_tables.dump();
+ __offload_myo_func_tables.dump();
+ __offload_myo_init_tables.dump();
+ }
+
+ if (!__offload_myo_var_tables.is_empty() ||
+ !__offload_myo_vtable_tables.is_empty() ||
+ !__offload_myo_func_tables.is_empty() ||
+ !__offload_myo_init_tables.is_empty())
+ {
+ OFFLOAD_DEBUG_TRACE(3, "MYO usage detected in program\n");
+
+ // Make sure myo library is loaded
+ __offload_myoLoadLibrary();
+ OFFLOAD_DEBUG_TRACE(3, "Initialized MYO\n");
+
+ __offload_myo_var_tables.process();
+ __offload_myo_vtable_tables.process_vtable();
+ __offload_myo_func_tables.process();
+ __offload_myo_init_tables.process();
+ OFFLOAD_DEBUG_TRACE(3, "Finished processing MYO tables\n");
+ } else {
+ OFFLOAD_DEBUG_TRACE(3,
+ "MYO tables are empty; Will not initialize MYO\n");
+ }
+}
+
+DLL_LOCAL void __offload_myoFini(void)
{
if (myo_is_available) {
OFFLOAD_DEBUG_TRACE(3, "%s\n", __func__);
@@ -516,7 +883,7 @@ void __offload_myoFini(void)
}
}
-static void __offload_myo_shared_table_register(
+static void __offload_myo_shared_table_process(
SharedTableEntry *entry
)
{
@@ -529,7 +896,8 @@ static void __offload_myo_shared_table_register(
for (; entry->varName != MYO_TABLE_END_MARKER(); entry++) {
#ifdef TARGET_WINNT
if (entry->varName == 0) {
- OFFLOAD_DEBUG_TRACE(4, "skip registering a NULL MyoSharedTable entry\n");
+ OFFLOAD_DEBUG_TRACE(4,
+ "skip registering a NULL MyoSharedTable entry\n");
continue;
}
#endif // TARGET_WINNT
@@ -550,29 +918,69 @@ static void __offload_myo_shared_table_register(
}
}
-static void __offload_myo_shared_init_table_register(InitTableEntry* entry)
+static void __offload_myo_shared_vtable_process(
+ SharedTableEntry *entry
+)
+{
+ SharedTableEntry *start = entry;
+ int entries = 0;
+
+ OFFLOAD_DEBUG_TRACE(3, "%s(%p)\n", __func__, entry);
+
+ // allocate shared memory for vtables
+ for (; entry->varName != MYO_TABLE_END_MARKER(); entry++) {
+#ifdef TARGET_WINNT
+ if (entry->varName == 0) {
+ OFFLOAD_DEBUG_TRACE(4,
+ "skip registering a NULL MyoSharedVTable entry\n");
+ continue;
+ }
+#endif // TARGET_WINNT
+
+ OFFLOAD_DEBUG_TRACE(4,
+ "registering MyoSharedVTable entry for %s @%p\n",
+ entry->varName, entry);
+
+ // Invoke the function to create shared memory
+ reinterpret_cast<void(*)(MyoArena)>(entry->sharedAddr)(
+ myo_wrapper.GetVtableArena());
+ entries++;
+ }
+
+ // add table to the list if it is not empty
+ if (entries > 0) {
+ mutex_locker_t locker(__myo_table_lock);
+ __myo_table_list.push_back(MyoTable(start, entries));
+ __myo_tables = true;
+ }
+}
+
+void __offload_myo_shared_init_table_process(InitTableEntry* entry)
{
OFFLOAD_DEBUG_TRACE(3, "%s(%p)\n", __func__, entry);
#ifdef TARGET_WINNT
for (; entry->funcName != MYO_TABLE_END_MARKER(); entry++) {
if (entry->funcName == 0) {
- OFFLOAD_DEBUG_TRACE(4, "skip registering a NULL MyoSharedInit entry\n");
+ OFFLOAD_DEBUG_TRACE(4,
+ "skip registering a NULL MyoSharedInit entry\n");
continue;
}
// Invoke the function to init the shared memory
- entry->func();
+ OFFLOAD_DEBUG_TRACE(4, "execute MyoSharedInit routine for %s\n",
+ entry->funcName);
+ entry->func(myo_wrapper.GetVtableArena());
}
#else // TARGET_WINNT
for (; entry->func != 0; entry++) {
// Invoke the function to init the shared memory
- entry->func();
+ entry->func(myo_wrapper.GetVtableArena());
}
#endif // TARGET_WINNT
}
-static void __offload_myo_fptr_table_register(
+static void __offload_myo_fptr_table_process(
FptrTableEntry *entry
)
{
@@ -584,7 +992,8 @@ static void __offload_myo_fptr_table_register(
for (; entry->funcName != MYO_TABLE_END_MARKER(); entry++) {
#ifdef TARGET_WINNT
if (entry->funcName == 0) {
- OFFLOAD_DEBUG_TRACE(4, "skip registering a NULL MyoFptrTable entry\n");
+ OFFLOAD_DEBUG_TRACE(4,
+ "skip registering a NULL MyoFptrTable entry\n");
continue;
}
#endif // TARGET_WINNT
@@ -722,6 +1131,80 @@ extern "C" void _Offload_shared_aligned_free(void *ptr)
}
}
+extern "C" void _Offload_shared_arena_create(
+ MyoOwnershipType ownership,
+ int consistency,
+ MyoArena* arena
+)
+{
+ OFFLOAD_DEBUG_TRACE(3, "%s(%d, %d, %p)\n",
+ __func__, ownership, consistency, arena);
+
+ if (__offload_myoLoadLibrary()) {
+ myo_wrapper.ArenaCreate(ownership, consistency, arena);
+ }
+}
+
+extern "C" void* _Offload_shared_aligned_arena_malloc(
+ MyoArena arena,
+ size_t size,
+ size_t align
+)
+{
+ OFFLOAD_DEBUG_TRACE(3, "%s(%u, %lld, %lld)\n",
+ __func__, arena, size, align);
+
+ if (__offload_myoLoadLibrary()) {
+ void *p = myo_wrapper.SharedAlignedArenaMalloc(arena, size, align);
+ OFFLOAD_DEBUG_TRACE(3, "%s(%u, %lld, %lld)->%p\n",
+ __func__, arena, size, align, p);
+ return p;
+ }
+ else {
+ if (align < sizeof(void*)) {
+ align = sizeof(void*);
+ }
+ return _mm_malloc(size, align);
+ }
+}
+
+extern "C" void _Offload_shared_aligned_arena_free(
+ MyoArena arena,
+ void *ptr
+)
+{
+ OFFLOAD_DEBUG_TRACE(3, "%s(%u, %p)\n", __func__, arena, ptr);
+
+ if (__offload_myoLoadLibrary()) {
+ myo_wrapper.SharedAlignedArenaFree(arena, ptr);
+ }
+ else {
+ _mm_free(ptr);
+ }
+}
+
+extern "C" void _Offload_shared_arena_acquire(
+ MyoArena arena
+)
+{
+ OFFLOAD_DEBUG_TRACE(3, "%s(%u)\n", __func__, arena);
+
+ if (__offload_myoLoadLibrary()) {
+ myo_wrapper.ArenaAcquire(arena);
+ }
+}
+
+extern "C" void _Offload_shared_arena_release(
+ MyoArena arena
+)
+{
+ OFFLOAD_DEBUG_TRACE(3, "%s(%u)\n", __func__, arena);
+
+ if (__offload_myoLoadLibrary()) {
+ myo_wrapper.ArenaRelease(arena);
+ }
+}
+
extern "C" void __intel_cilk_for_32_offload(
int size,
void (*copy_constructor)(void*, void*),
diff --git a/liboffloadmic/runtime/offload_myo_host.h b/liboffloadmic/runtime/offload_myo_host.h
index 1116ee3601ea..5b9f160f1bee 100644
--- a/liboffloadmic/runtime/offload_myo_host.h
+++ b/liboffloadmic/runtime/offload_myo_host.h
@@ -1,5 +1,5 @@
/*
- Copyright (c) 2014 Intel Corporation. All Rights Reserved.
+ Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
@@ -34,67 +34,35 @@
#include <myotypes.h>
#include <myoimpl.h>
#include <myo.h>
-#include "offload.h"
-
-typedef MyoiSharedVarEntry SharedTableEntry;
-//typedef MyoiHostSharedFptrEntry FptrTableEntry;
-typedef struct {
- //! Function Name
- const char *funcName;
- //! Function Address
- void *funcAddr;
- //! Local Thunk Address
- void *localThunkAddr;
-#ifdef TARGET_WINNT
- // Dummy to pad up to 32 bytes
- void *dummy;
-#endif // TARGET_WINNT
-} FptrTableEntry;
-
-struct InitTableEntry {
-#ifdef TARGET_WINNT
- // Dummy to pad up to 16 bytes
- // Function Name
- const char *funcName;
-#endif // TARGET_WINNT
- void (*func)(void);
-};
-
-#ifdef TARGET_WINNT
-#define OFFLOAD_MYO_SHARED_TABLE_SECTION_START ".MyoSharedTable$a"
-#define OFFLOAD_MYO_SHARED_TABLE_SECTION_END ".MyoSharedTable$z"
-
-#define OFFLOAD_MYO_SHARED_INIT_TABLE_SECTION_START ".MyoSharedInitTable$a"
-#define OFFLOAD_MYO_SHARED_INIT_TABLE_SECTION_END ".MyoSharedInitTable$z"
-
-#define OFFLOAD_MYO_FPTR_TABLE_SECTION_START ".MyoFptrTable$a"
-#define OFFLOAD_MYO_FPTR_TABLE_SECTION_END ".MyoFptrTable$z"
-#else // TARGET_WINNT
-#define OFFLOAD_MYO_SHARED_TABLE_SECTION_START ".MyoSharedTable."
-#define OFFLOAD_MYO_SHARED_TABLE_SECTION_END ".MyoSharedTable."
-
-#define OFFLOAD_MYO_SHARED_INIT_TABLE_SECTION_START ".MyoSharedInitTable."
-#define OFFLOAD_MYO_SHARED_INIT_TABLE_SECTION_END ".MyoSharedInitTable."
-
-#define OFFLOAD_MYO_FPTR_TABLE_SECTION_START ".MyoFptrTable."
-#define OFFLOAD_MYO_FPTR_TABLE_SECTION_END ".MyoFptrTable."
-#endif // TARGET_WINNT
-
-#pragma section(OFFLOAD_MYO_SHARED_TABLE_SECTION_START, read, write)
-#pragma section(OFFLOAD_MYO_SHARED_TABLE_SECTION_END, read, write)
-
-#pragma section(OFFLOAD_MYO_SHARED_INIT_TABLE_SECTION_START, read, write)
-#pragma section(OFFLOAD_MYO_SHARED_INIT_TABLE_SECTION_END, read, write)
-
-#pragma section(OFFLOAD_MYO_FPTR_TABLE_SECTION_START, read, write)
-#pragma section(OFFLOAD_MYO_FPTR_TABLE_SECTION_END, read, write)
+#include "offload.h"
+// undefine the following since offload.h defines them to malloc and free if __INTEL_OFFLOAD
+// is not defined which is the case when building the offload library
+#undef _Offload_shared_malloc
+#undef _Offload_shared_free
+#undef _Offload_shared_aligned_malloc
+#undef _Offload_shared_aligned_free
+#include "offload_table.h"
+
+// This function retained for compatibility with 15.0
extern "C" void __offload_myoRegisterTables(
InitTableEntry *init_table,
SharedTableEntry *shared_table,
FptrTableEntry *fptr_table
);
+// Process shared variable, shared vtable and function and init routine tables.
+// In .dlls/.sos these will be collected together.
+// In the main program, all collected tables will be processed.
+extern "C" bool __offload_myoProcessTables(
+ const void* image,
+ MYOInitTableList::Node *init_table,
+ MYOVarTableList::Node *shared_table,
+ MYOVarTableList::Node *shared_vtable,
+ MYOFuncTableList::Node *fptr_table
+);
+
extern void __offload_myoFini(void);
+extern bool __offload_myo_init_is_deferred(const void *image);
#endif // OFFLOAD_MYO_HOST_H_INCLUDED
diff --git a/liboffloadmic/runtime/offload_myo_target.cpp b/liboffloadmic/runtime/offload_myo_target.cpp
index bd5ad17adbc4..5160ca951ac3 100644
--- a/liboffloadmic/runtime/offload_myo_target.cpp
+++ b/liboffloadmic/runtime/offload_myo_target.cpp
@@ -1,5 +1,5 @@
/*
- Copyright (c) 2014 Intel Corporation. All Rights Reserved.
+ Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
@@ -44,7 +44,7 @@ static void CheckResult(const char *func, MyoError error) {
}
}
-static void __offload_myo_shared_table_register(SharedTableEntry *entry)
+static void __offload_myo_shared_table_process(SharedTableEntry *entry)
{
int entries = 0;
SharedTableEntry *t_start;
@@ -68,7 +68,32 @@ static void __offload_myo_shared_table_register(SharedTableEntry *entry)
}
}
-static void __offload_myo_fptr_table_register(
+static void __offload_myo_shared_vtable_process(SharedTableEntry *entry)
+{
+ int entries = 0;
+ SharedTableEntry *t_start;
+
+ OFFLOAD_DEBUG_TRACE(3, "%s(%p)\n", __func__, entry);
+
+ t_start = entry;
+ while (t_start->varName != 0) {
+ OFFLOAD_DEBUG_TRACE_1(4, 0, c_offload_mic_myo_shared,
+ "myo shared vtable entry name"
+ " = \"%s\" addr = %p\n",
+ t_start->varName, t_start->sharedAddr);
+ t_start++;
+ entries++;
+ }
+
+ if (entries > 0) {
+ OFFLOAD_DEBUG_TRACE(3, "myoiMicVarTableRegister(%p, %d)\n", entry,
+ entries);
+ CheckResult("myoiMicVarTableRegister",
+ myoiMicVarTableRegister(entry, entries));
+ }
+}
+
+static void __offload_myo_fptr_table_process(
FptrTableEntry *entry
)
{
@@ -94,9 +119,22 @@ static void __offload_myo_fptr_table_register(
}
}
+void __offload_myo_shared_init_table_process(InitTableEntry* entry)
+{
+ OFFLOAD_DEBUG_TRACE(3, "%s(%p)\n", __func__, entry);
+
+ for (; entry->func != 0; entry++) {
+ // Invoke the function to init the shared memory
+ OFFLOAD_DEBUG_TRACE(3, "Invoked a shared init function @%p\n",
+ (void *)(entry->func));
+ entry->func();
+ }
+}
+
extern "C" void __offload_myoAcquire(void)
{
OFFLOAD_DEBUG_TRACE(3, "%s\n", __func__);
+
CheckResult("myoAcquire", myoAcquire());
}
@@ -162,8 +200,35 @@ extern "C" void __offload_myoRegisterTables(
return;
}
- __offload_myo_shared_table_register(shared_table);
- __offload_myo_fptr_table_register(fptr_table);
+ __offload_myo_shared_table_process(shared_table);
+ __offload_myo_fptr_table_process(fptr_table);
+}
+
+extern "C" void __offload_myoProcessTables(
+ InitTableEntry* init_table,
+ SharedTableEntry *shared_table,
+ SharedTableEntry *shared_vtable,
+ FptrTableEntry *fptr_table
+)
+{
+ OFFLOAD_DEBUG_TRACE(3, "%s\n", __func__);
+
+ // one time registration of Intel(R) Cilk(TM) language entries
+ static pthread_once_t once_control = PTHREAD_ONCE_INIT;
+ pthread_once(&once_control, __offload_myo_once_init);
+
+ // register module's tables
+ // check slot-1 of the function table because
+ // slot-0 is predefined with --vtable_initializer--
+ if (shared_table->varName == 0 &&
+ shared_vtable->varName == 0 &&
+ fptr_table[1].funcName == 0) {
+ return;
+ }
+
+ __offload_myo_shared_table_process(shared_table);
+ __offload_myo_shared_vtable_process(shared_vtable);
+ __offload_myo_fptr_table_process(fptr_table);
}
extern "C" void* _Offload_shared_malloc(size_t size)
@@ -190,6 +255,46 @@ extern "C" void _Offload_shared_aligned_free(void *ptr)
myoSharedAlignedFree(ptr);
}
+extern "C" void* _Offload_shared_aligned_arena_malloc(
+ MyoArena arena,
+ size_t size,
+ size_t align
+)
+{
+ OFFLOAD_DEBUG_TRACE(
+ 3, "%s(%u, %lld, %lld)\n", __func__, arena, size, align);
+
+ return myoArenaAlignedMalloc(arena, size, align);
+}
+
+extern "C" void _Offload_shared_aligned_arena_free(
+ MyoArena arena,
+ void *ptr
+)
+{
+ OFFLOAD_DEBUG_TRACE(3, "%s(%u, %p)\n", __func__, arena, ptr);
+
+ myoArenaAlignedFree(arena, ptr);
+}
+
+extern "C" void _Offload_shared_arena_acquire(
+ MyoArena arena
+)
+{
+ OFFLOAD_DEBUG_TRACE(3, "%s(%u)\n", __func__, arena);
+
+ myoArenaAcquire(arena);
+}
+
+extern "C" void _Offload_shared_arena_release(
+ MyoArena arena
+)
+{
+ OFFLOAD_DEBUG_TRACE(3, "%s(%u)\n", __func__, arena);
+
+ myoArenaRelease(arena);
+}
+
// temporary workaround for blocking behavior of myoiLibInit/Fini calls
extern "C" void __offload_myoLibInit()
{
diff --git a/liboffloadmic/runtime/offload_myo_target.h b/liboffloadmic/runtime/offload_myo_target.h
index 777a3da1acab..4383aae0b5e6 100644
--- a/liboffloadmic/runtime/offload_myo_target.h
+++ b/liboffloadmic/runtime/offload_myo_target.h
@@ -1,5 +1,5 @@
/*
- Copyright (c) 2014 Intel Corporation. All Rights Reserved.
+ Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
@@ -31,42 +31,38 @@
#ifndef OFFLOAD_MYO_TARGET_H_INCLUDED
#define OFFLOAD_MYO_TARGET_H_INCLUDED
-#include <myotypes.h>
-#include <myoimpl.h>
-#include <myo.h>
-#include "offload.h"
-
-typedef MyoiSharedVarEntry SharedTableEntry;
-typedef MyoiTargetSharedFptrEntry FptrTableEntry;
-
-#ifdef TARGET_WINNT
-#define OFFLOAD_MYO_SHARED_TABLE_SECTION_START ".MyoSharedTable$a"
-#define OFFLOAD_MYO_SHARED_TABLE_SECTION_END ".MyoSharedTable$z"
-
-#define OFFLOAD_MYO_FPTR_TABLE_SECTION_START ".MyoFptrTable$a"
-#define OFFLOAD_MYO_FPTR_TABLE_SECTION_END ".MyoFptrTable$z"
-#else // TARGET_WINNT
-#define OFFLOAD_MYO_SHARED_TABLE_SECTION_START ".MyoSharedTable."
-#define OFFLOAD_MYO_SHARED_TABLE_SECTION_END ".MyoSharedTable."
-
-#define OFFLOAD_MYO_FPTR_TABLE_SECTION_START ".MyoFptrTable."
-#define OFFLOAD_MYO_FPTR_TABLE_SECTION_END ".MyoFptrTable."
-#endif // TARGET_WINNT
-
-#pragma section(OFFLOAD_MYO_SHARED_TABLE_SECTION_START, read, write)
-#pragma section(OFFLOAD_MYO_SHARED_TABLE_SECTION_END, read, write)
-
-#pragma section(OFFLOAD_MYO_FPTR_TABLE_SECTION_START, read, write)
-#pragma section(OFFLOAD_MYO_FPTR_TABLE_SECTION_END, read, write)
+#include "offload.h"
+// undefine the following since offload.h defines them to malloc and free if __INTEL_OFFLOAD
+// is not defined which is the case when building the offload library
+#undef _Offload_shared_malloc
+#undef _Offload_shared_free
+#undef _Offload_shared_aligned_malloc
+#undef _Offload_shared_aligned_free
+#include "offload_table.h"
+
+// This function retained for compatibility with 15.0
extern "C" void __offload_myoRegisterTables(
SharedTableEntry *shared_table,
FptrTableEntry *fptr_table
);
+// Process shared variable, shared vtable and function and init routine tables.
+// On the target side the contents of the tables are registered with MYO.
+extern "C" void __offload_myoProcessTables(
+ InitTableEntry* init_table,
+ SharedTableEntry *shared_table,
+ SharedTableEntry *shared_vtable,
+ FptrTableEntry *fptr_table
+);
+
extern "C" void __offload_myoAcquire(void);
extern "C" void __offload_myoRelease(void);
+// Call the compiler-generated routines for initializing shared variables.
+// This can only be done after shared memory allocation has been done.
+extern void __offload_myo_shared_init_table_process(InitTableEntry* entry);
+
// temporary workaround for blocking behavior for myoiLibInit/Fini calls
extern "C" void __offload_myoLibInit();
extern "C" void __offload_myoLibFini();
diff --git a/liboffloadmic/runtime/offload_omp_host.cpp b/liboffloadmic/runtime/offload_omp_host.cpp
index ceba612ac3b4..14498470e3ab 100644
--- a/liboffloadmic/runtime/offload_omp_host.cpp
+++ b/liboffloadmic/runtime/offload_omp_host.cpp
@@ -1,5 +1,5 @@
/*
- Copyright (c) 2014 Intel Corporation. All Rights Reserved.
+ Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
diff --git a/liboffloadmic/runtime/offload_omp_target.cpp b/liboffloadmic/runtime/offload_omp_target.cpp
index 2ccce7c76874..91baef5da87d 100644
--- a/liboffloadmic/runtime/offload_omp_target.cpp
+++ b/liboffloadmic/runtime/offload_omp_target.cpp
@@ -1,5 +1,5 @@
/*
- Copyright (c) 2014 Intel Corporation. All Rights Reserved.
+ Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
@@ -86,7 +86,7 @@ static int omp_get_int_from_host(
return setting;
}
-void omp_set_num_threads_lrb(
+DLL_LOCAL void omp_set_num_threads_lrb(
void *ofld
)
{
@@ -96,7 +96,7 @@ void omp_set_num_threads_lrb(
omp_set_num_threads(num_threads);
}
-void omp_get_max_threads_lrb(
+DLL_LOCAL void omp_get_max_threads_lrb(
void *ofld
)
{
@@ -106,7 +106,7 @@ void omp_get_max_threads_lrb(
omp_send_int_to_host(ofld, num_threads);
}
-void omp_get_num_procs_lrb(
+DLL_LOCAL void omp_get_num_procs_lrb(
void *ofld
)
{
@@ -116,7 +116,7 @@ void omp_get_num_procs_lrb(
omp_send_int_to_host(ofld, num_procs);
}
-void omp_set_dynamic_lrb(
+DLL_LOCAL void omp_set_dynamic_lrb(
void *ofld
)
{
@@ -126,7 +126,7 @@ void omp_set_dynamic_lrb(
omp_set_dynamic(dynamic);
}
-void omp_get_dynamic_lrb(
+DLL_LOCAL void omp_get_dynamic_lrb(
void *ofld
)
{
@@ -136,7 +136,7 @@ void omp_get_dynamic_lrb(
omp_send_int_to_host(ofld, dynamic);
}
-void omp_set_nested_lrb(
+DLL_LOCAL void omp_set_nested_lrb(
void *ofld
)
{
@@ -146,7 +146,7 @@ void omp_set_nested_lrb(
omp_set_nested(nested);
}
-void omp_get_nested_lrb(
+DLL_LOCAL void omp_get_nested_lrb(
void *ofld
)
{
@@ -156,7 +156,7 @@ void omp_get_nested_lrb(
omp_send_int_to_host(ofld, nested);
}
-void omp_set_schedule_lrb(
+DLL_LOCAL void omp_set_schedule_lrb(
void *ofld_
)
{
@@ -180,7 +180,7 @@ void omp_set_schedule_lrb(
OFFLOAD_TARGET_LEAVE(ofld);
}
-void omp_get_schedule_lrb(
+DLL_LOCAL void omp_get_schedule_lrb(
void *ofld_
)
{
@@ -206,7 +206,7 @@ void omp_get_schedule_lrb(
// lock API functions
-void omp_init_lock_lrb(
+DLL_LOCAL void omp_init_lock_lrb(
void *ofld_
)
{
@@ -224,7 +224,7 @@ void omp_init_lock_lrb(
OFFLOAD_TARGET_LEAVE(ofld);
}
-void omp_destroy_lock_lrb(
+DLL_LOCAL void omp_destroy_lock_lrb(
void *ofld_
)
{
@@ -242,7 +242,7 @@ void omp_destroy_lock_lrb(
OFFLOAD_TARGET_LEAVE(ofld);
}
-void omp_set_lock_lrb(
+DLL_LOCAL void omp_set_lock_lrb(
void *ofld_
)
{
@@ -260,7 +260,7 @@ void omp_set_lock_lrb(
OFFLOAD_TARGET_LEAVE(ofld);
}
-void omp_unset_lock_lrb(
+DLL_LOCAL void omp_unset_lock_lrb(
void *ofld_
)
{
@@ -278,7 +278,7 @@ void omp_unset_lock_lrb(
OFFLOAD_TARGET_LEAVE(ofld);
}
-void omp_test_lock_lrb(
+DLL_LOCAL void omp_test_lock_lrb(
void *ofld_
)
{
@@ -304,7 +304,7 @@ void omp_test_lock_lrb(
// nested lock API functions
-void omp_init_nest_lock_lrb(
+DLL_LOCAL void omp_init_nest_lock_lrb(
void *ofld_
)
{
@@ -322,7 +322,7 @@ void omp_init_nest_lock_lrb(
OFFLOAD_TARGET_LEAVE(ofld);
}
-void omp_destroy_nest_lock_lrb(
+DLL_LOCAL void omp_destroy_nest_lock_lrb(
void *ofld_
)
{
@@ -340,7 +340,7 @@ void omp_destroy_nest_lock_lrb(
OFFLOAD_TARGET_LEAVE(ofld);
}
-void omp_set_nest_lock_lrb(
+DLL_LOCAL void omp_set_nest_lock_lrb(
void *ofld_
)
{
@@ -358,7 +358,7 @@ void omp_set_nest_lock_lrb(
OFFLOAD_TARGET_LEAVE(ofld);
}
-void omp_unset_nest_lock_lrb(
+DLL_LOCAL void omp_unset_nest_lock_lrb(
void *ofld_
)
{
@@ -376,7 +376,7 @@ void omp_unset_nest_lock_lrb(
OFFLOAD_TARGET_LEAVE(ofld);
}
-void omp_test_nest_lock_lrb(
+DLL_LOCAL void omp_test_nest_lock_lrb(
void *ofld_
)
{
diff --git a/liboffloadmic/runtime/offload_orsl.cpp b/liboffloadmic/runtime/offload_orsl.cpp
index aa3edc36e03f..9a7ac3b6f63f 100644
--- a/liboffloadmic/runtime/offload_orsl.cpp
+++ b/liboffloadmic/runtime/offload_orsl.cpp
@@ -1,5 +1,5 @@
/*
- Copyright (c) 2014 Intel Corporation. All Rights Reserved.
+ Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
@@ -36,7 +36,7 @@
namespace ORSL {
static bool is_enabled = false;
-static const ORSLTag my_tag = "Offload";
+static const ORSLTag my_tag = (const ORSLTag) "Offload";
void init()
{
diff --git a/liboffloadmic/runtime/offload_orsl.h b/liboffloadmic/runtime/offload_orsl.h
index 8bdbf1abb507..df3f5fc36ea0 100644
--- a/liboffloadmic/runtime/offload_orsl.h
+++ b/liboffloadmic/runtime/offload_orsl.h
@@ -1,5 +1,5 @@
/*
- Copyright (c) 2014 Intel Corporation. All Rights Reserved.
+ Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
@@ -28,17 +28,19 @@
*/
+#include "offload_util.h"
+
#ifndef OFFLOAD_ORSL_H_INCLUDED
#define OFFLOAD_ORSL_H_INCLUDED
// ORSL interface
namespace ORSL {
-extern void init();
+DLL_LOCAL extern void init();
-extern bool reserve(int device);
-extern bool try_reserve(int device);
-extern void release(int device);
+DLL_LOCAL extern bool reserve(int device);
+DLL_LOCAL extern bool try_reserve(int device);
+DLL_LOCAL extern void release(int device);
} // namespace ORSL
diff --git a/liboffloadmic/runtime/offload_table.cpp b/liboffloadmic/runtime/offload_table.cpp
index d73def16e702..f3c5100cacd9 100644
--- a/liboffloadmic/runtime/offload_table.cpp
+++ b/liboffloadmic/runtime/offload_table.cpp
@@ -1,5 +1,5 @@
/*
- Copyright (c) 2014 Intel Corporation. All Rights Reserved.
+ Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
@@ -321,6 +321,8 @@ extern "C" void __offload_unregister_tables(
VarList::Node *var_table
)
{
+ OFFLOAD_DEBUG_TRACE(2, "Unregistering offload function entry table %p\n",
+ entry_table);
__offload_entries.remove_table(entry_table);
OFFLOAD_DEBUG_TRACE(2, "Unregistering function table %p\n", func_table);
@@ -329,3 +331,219 @@ extern "C" void __offload_unregister_tables(
OFFLOAD_DEBUG_TRACE(2, "Unregistering var table %p\n", var_table);
__offload_vars.remove_table(var_table);
}
+
+#ifdef MYO_SUPPORT
+
+MYOVarTableList __offload_myo_var_tables;
+MYOVarTableList __offload_myo_vtable_tables;
+MYOFuncTableList __offload_myo_func_tables;
+MYOInitTableList __offload_myo_init_tables;
+
+// Debugging dump
+void MYOVarTableList::dump(void)
+{
+ OFFLOAD_DEBUG_TRACE(2, "MYO Var tables:\n");
+
+ m_lock.lock();
+
+ for (Node *n = m_head; n != 0; n = n->next) {
+ OFFLOAD_DEBUG_TRACE(2, " MYO Var table:\n");
+ for (const Table::Entry *e = n->table.entries;
+ e->varName != MYO_TABLE_END_MARKER(); e++) {
+#ifdef TARGET_WINNT
+ if (e->varName == 0) {
+ continue;
+ }
+#endif // TARGET_WINNT
+ OFFLOAD_DEBUG_TRACE(2, " %s %p\n",
+ e->varName, e->sharedAddr);
+ }
+ }
+
+ m_lock.unlock();
+}
+
+// check if any shared variables
+bool MYOVarTableList::is_empty()
+{
+ OFFLOAD_DEBUG_TRACE(3, "Are MYO Var tables empty?\n");
+
+ m_lock.lock();
+
+ for (Node *n = m_head; n != 0; n = n->next) {
+ for (const Table::Entry *e = n->table.entries;
+ e->varName != MYO_TABLE_END_MARKER(); e++) {
+#ifdef TARGET_WINNT
+ if (e->varName == 0) {
+ continue;
+ }
+#endif // TARGET_WINNT
+ m_lock.unlock();
+ OFFLOAD_DEBUG_TRACE(3, "No\n");
+ return false;
+ }
+ }
+
+ m_lock.unlock();
+ OFFLOAD_DEBUG_TRACE(3, "Yes\n");
+ return true;
+}
+
+void MYOFuncTableList::dump(void)
+{
+ OFFLOAD_DEBUG_TRACE(2, "MYO Func tables:\n");
+
+ m_lock.lock();
+
+ for (Node *n = m_head; n != 0; n = n->next) {
+ OFFLOAD_DEBUG_TRACE(2, " MYO Func table:\n");
+ for (const Table::Entry *e = n->table.entries;
+ e->funcName != MYO_TABLE_END_MARKER(); e++) {
+#ifdef TARGET_WINNT
+ if (e->funcName == 0) {
+ continue;
+ }
+#endif // TARGET_WINNT
+#if HOST_LIBRARY
+ OFFLOAD_DEBUG_TRACE(2, " %s %p %p\n",
+ e->funcName, e->funcAddr, e->localThunkAddr);
+#else // HOST_LIBRARY
+ OFFLOAD_DEBUG_TRACE(2, " %s %p %p %p\n",
+ e->funcName, e->funcAddr, e->wrapFuncAddr, e->localThunkAddr);
+#endif // HOST_LIBRARY
+ }
+ }
+
+ m_lock.unlock();
+}
+
+// check if any shared functions
+bool MYOFuncTableList::is_empty()
+{
+ OFFLOAD_DEBUG_TRACE(3, "Are MYO Func tables empty?\n");
+
+ m_lock.lock();
+
+ for (Node *n = m_head; n != 0; n = n->next) {
+ int count = 0;
+ for (const Table::Entry *e = n->table.entries;
+ e->funcName != MYO_TABLE_END_MARKER(); e++) {
+#ifdef TARGET_WINNT
+ if (e->funcName == 0) {
+ continue;
+ }
+#endif // TARGET_WINNT
+ count++;
+ if (count > 1) {
+ m_lock.unlock();
+ OFFLOAD_DEBUG_TRACE(3, "No\n");
+ return false;
+ }
+ }
+ }
+
+ m_lock.unlock();
+ OFFLOAD_DEBUG_TRACE(3, "Yes\n");
+ return true;
+}
+
+void MYOInitTableList::dump(void)
+{
+ OFFLOAD_DEBUG_TRACE(2, "MYO Init tables:\n");
+
+ m_lock.lock();
+
+ for (Node *n = m_head; n != 0; n = n->next) {
+ OFFLOAD_DEBUG_TRACE(2, " MYO Init table:\n");
+ for (const Table::Entry *e = n->table.entries;
+#ifdef TARGET_WINNT
+ e->funcName != MYO_TABLE_END_MARKER(); e++) {
+ if (e->funcName == 0) {
+ continue;
+ }
+ OFFLOAD_DEBUG_TRACE(2, " %s %p\n", e->funcName, e->func);
+#else // TARGET_WINNT
+ e->func != 0; e++) {
+ OFFLOAD_DEBUG_TRACE(2, " %p\n", e->func);
+#endif // TARGET_WINNT
+ }
+ }
+
+ m_lock.unlock();
+}
+
+// check if any shared functions
+bool MYOInitTableList::is_empty()
+{
+ OFFLOAD_DEBUG_TRACE(3, "Are MYO Init tables empty?\n");
+
+ m_lock.lock();
+
+ for (Node *n = m_head; n != 0; n = n->next) {
+ for (const Table::Entry *e = n->table.entries;
+#ifdef TARGET_WINNT
+ e->funcName != MYO_TABLE_END_MARKER(); e++) {
+ if (e->funcName == 0) {
+ continue;
+ }
+ m_lock.unlock();
+ OFFLOAD_DEBUG_TRACE(3, "No\n");
+ return false;
+#else // TARGET_WINNT
+ e->func != 0; e++) {
+#endif // TARGET_WINNT
+ }
+ }
+
+ m_lock.unlock();
+ OFFLOAD_DEBUG_TRACE(3, "Yes\n");
+ return true;
+}
+
+extern "C" void __offload_myoRegisterTables1(
+ MYOInitTableList::Node *init_table,
+ MYOVarTableList::Node *shared_table,
+ MYOVarTableList::Node *shared_vtable,
+ MYOFuncTableList::Node *fptr_table
+)
+{
+ OFFLOAD_DEBUG_TRACE(2, "Registering MYO shared var table %p\n",
+ shared_table);
+ __offload_myo_var_tables.add_table(shared_table);
+
+ OFFLOAD_DEBUG_TRACE(2, "Registering MYO shared vtable table %p\n",
+ shared_vtable);
+ __offload_myo_vtable_tables.add_table(shared_vtable);
+
+ OFFLOAD_DEBUG_TRACE(2, "Registering MYO function table %p\n", fptr_table);
+ __offload_myo_func_tables.add_table(fptr_table);
+
+ OFFLOAD_DEBUG_TRACE(2, "Registering MYO init table %p\n", init_table);
+ __offload_myo_init_tables.add_table(init_table);
+}
+
+extern "C" void __offload_myoRemoveTables(
+ MYOInitTableList::Node *init_table,
+ MYOVarTableList::Node *shared_table,
+ MYOVarTableList::Node *shared_vtable,
+ MYOFuncTableList::Node *fptr_table
+)
+{
+ OFFLOAD_DEBUG_TRACE(3, "%s\n", __func__);
+
+ OFFLOAD_DEBUG_TRACE(2, "Removing MYO shared var table %p\n",
+ shared_table);
+ __offload_myo_var_tables.remove_table(shared_table);
+
+ OFFLOAD_DEBUG_TRACE(2, "Removing MYO shared vtable table %p\n",
+ shared_vtable);
+ __offload_myo_vtable_tables.remove_table(shared_vtable);
+
+ OFFLOAD_DEBUG_TRACE(2, "Removing MYO function table %p\n", fptr_table);
+ __offload_myo_func_tables.remove_table(fptr_table);
+
+ OFFLOAD_DEBUG_TRACE(2, "Removing MYO init table %p\n", init_table);
+ __offload_myo_init_tables.remove_table(init_table);
+}
+
+#endif // MYO_SUPPORT
diff --git a/liboffloadmic/runtime/offload_table.h b/liboffloadmic/runtime/offload_table.h
index cc4caad47a15..5602f2bfc42f 100644
--- a/liboffloadmic/runtime/offload_table.h
+++ b/liboffloadmic/runtime/offload_table.h
@@ -1,5 +1,5 @@
/*
- Copyright (c) 2014 Intel Corporation. All Rights Reserved.
+ Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
@@ -35,7 +35,6 @@
#ifndef OFFLOAD_TABLE_H_INCLUDED
#define OFFLOAD_TABLE_H_INCLUDED
-#include <iterator>
#include "offload_util.h"
// Template representing double linked list of tables
@@ -56,7 +55,6 @@ public:
void add_table(Node *node) {
m_lock.lock();
-
if (m_head != 0) {
node->next = m_head;
m_head->prev = node;
@@ -67,8 +65,6 @@ public:
}
void remove_table(Node *node) {
- m_lock.lock();
-
if (node->next != 0) {
node->next->prev = node->prev;
}
@@ -78,8 +74,6 @@ public:
if (m_head == node) {
m_head = node->next;
}
-
- m_lock.unlock();
}
protected:
@@ -109,7 +103,7 @@ struct FuncTable {
};
// Function table
-class FuncList : public TableList<FuncTable> {
+class DLL_LOCAL FuncList : public TableList<FuncTable> {
public:
explicit FuncList(Node *node = 0) : TableList<Table>(node),
m_max_name_len(-1)
@@ -172,7 +166,7 @@ struct VarTable {
};
// List of var tables
-class VarList : public TableList<VarTable> {
+class DLL_LOCAL VarList : public TableList<VarTable> {
public:
VarList() : TableList<Table>()
{}
@@ -181,69 +175,9 @@ public:
void dump();
public:
- // var table list iterator
- class Iterator : public std::iterator<std::input_iterator_tag,
- Table::Entry> {
- public:
- Iterator() : m_node(0), m_entry(0) {}
-
- explicit Iterator(Node *node) {
- new_node(node);
- }
-
- Iterator& operator++() {
- if (m_entry != 0) {
- m_entry++;
- while (m_entry->name == 0) {
- m_entry++;
- }
- if (m_entry->name == reinterpret_cast<const char*>(-1)) {
- new_node(m_node->next);
- }
- }
- return *this;
- }
-
- bool operator==(const Iterator &other) const {
- return m_entry == other.m_entry;
- }
-
- bool operator!=(const Iterator &other) const {
- return m_entry != other.m_entry;
- }
-
- const Table::Entry* operator*() const {
- return m_entry;
- }
-
- private:
- void new_node(Node *node) {
- m_node = node;
- m_entry = 0;
- while (m_node != 0) {
- m_entry = m_node->table.entries;
- while (m_entry->name == 0) {
- m_entry++;
- }
- if (m_entry->name != reinterpret_cast<const char*>(-1)) {
- break;
- }
- m_node = m_node->next;
- m_entry = 0;
- }
- }
-
- private:
- Node *m_node;
- const Table::Entry *m_entry;
- };
-
- Iterator begin() const {
- return Iterator(m_head);
- }
- Iterator end() const {
- return Iterator();
+ Node * get_head() {
+ return m_head;
}
public:
@@ -265,9 +199,9 @@ public:
static void table_patch_names(void *buf, int64_t nelems);
};
-extern FuncList __offload_entries;
-extern FuncList __offload_funcs;
-extern VarList __offload_vars;
+DLL_LOCAL extern FuncList __offload_entries;
+DLL_LOCAL extern FuncList __offload_funcs;
+DLL_LOCAL extern VarList __offload_vars;
// Section names where the lookup tables are stored
#ifdef TARGET_WINNT
@@ -318,4 +252,206 @@ extern "C" void __offload_unregister_tables(
FuncList::Node *func_table,
VarList::Node *var_table
);
+
+
+#ifdef MYO_SUPPORT
+
+#include <myotypes.h>
+#include <myoimpl.h>
+#include <myo.h>
+
+#ifdef TARGET_WINNT
+#define MYO_TABLE_END_MARKER() reinterpret_cast<const char*>(-1)
+#else // TARGET_WINNT
+#define MYO_TABLE_END_MARKER() reinterpret_cast<const char*>(0)
+#endif // TARGET_WINNT
+
+// Host and Target-side MYO shared variable table entry layout
+typedef MyoiSharedVarEntry SharedTableEntry;
+
+#if HOST_LIBRARY
+
+// Host-side MYO function table entry layout
+typedef struct {
+ //! Function Name
+ const char *funcName;
+ //! Function Address
+ void *funcAddr;
+ //! Local Thunk Address
+ void *localThunkAddr;
+#ifdef TARGET_WINNT
+ // Dummy to pad up to 32 bytes
+ void *dummy;
+#endif // TARGET_WINNT
+} FptrTableEntry;
+
+// Host-side MYO init routine table entry layout
+typedef struct {
+#ifdef TARGET_WINNT
+ // Dummy to pad up to 16 bytes
+ // Function Name
+ const char *funcName;
+#endif // TARGET_WINNT
+ void (*func)(MyoArena);
+} InitTableEntry;
+
+#else // HOST_LIBRARY
+
+// Target-side MYO function table entry layout
+typedef MyoiTargetSharedFptrEntry FptrTableEntry;
+
+// Target-side MYO init routine table entry layout
+struct InitTableEntry {
+ void (*func)(void);
+};
+
+#endif // HOST_LIBRARY
+
+#ifdef TARGET_WINNT
+
+#define OFFLOAD_MYO_SHARED_TABLE_SECTION_START ".MyoSharedTable$a"
+#define OFFLOAD_MYO_SHARED_TABLE_SECTION_END ".MyoSharedTable$z"
+
+#define OFFLOAD_MYO_SHARED_VTABLE_SECTION_START ".MyoSharedVTable$a"
+#define OFFLOAD_MYO_SHARED_VTABLE_SECTION_END ".MyoSharedVTable$z"
+
+#define OFFLOAD_MYO_SHARED_INIT_TABLE_SECTION_START ".MyoSharedInitTable$a"
+#define OFFLOAD_MYO_SHARED_INIT_TABLE_SECTION_END ".MyoSharedInitTable$z"
+
+#define OFFLOAD_MYO_FPTR_TABLE_SECTION_START ".MyoFptrTable$a"
+#define OFFLOAD_MYO_FPTR_TABLE_SECTION_END ".MyoFptrTable$z"
+
+#else // TARGET_WINNT
+
+#define OFFLOAD_MYO_SHARED_TABLE_SECTION_START ".MyoSharedTable."
+#define OFFLOAD_MYO_SHARED_TABLE_SECTION_END ".MyoSharedTable."
+
+#define OFFLOAD_MYO_SHARED_VTABLE_SECTION_START ".MyoSharedVTable."
+#define OFFLOAD_MYO_SHARED_VTABLE_SECTION_END ".MyoSharedVTable."
+
+#define OFFLOAD_MYO_SHARED_INIT_TABLE_SECTION_START ".MyoSharedInitTable."
+#define OFFLOAD_MYO_SHARED_INIT_TABLE_SECTION_END ".MyoSharedInitTable."
+
+#define OFFLOAD_MYO_FPTR_TABLE_SECTION_START ".MyoFptrTable."
+#define OFFLOAD_MYO_FPTR_TABLE_SECTION_END ".MyoFptrTable."
+
+#endif // TARGET_WINNT
+
+#pragma section(OFFLOAD_MYO_SHARED_TABLE_SECTION_START, read, write)
+#pragma section(OFFLOAD_MYO_SHARED_TABLE_SECTION_END, read, write)
+
+#pragma section(OFFLOAD_MYO_SHARED_VTABLE_SECTION_START, read, write)
+#pragma section(OFFLOAD_MYO_SHARED_VTABLE_SECTION_END, read, write)
+
+#pragma section(OFFLOAD_MYO_SHARED_INIT_TABLE_SECTION_START, read, write)
+#pragma section(OFFLOAD_MYO_SHARED_INIT_TABLE_SECTION_END, read, write)
+
+#pragma section(OFFLOAD_MYO_FPTR_TABLE_SECTION_START, read, write)
+#pragma section(OFFLOAD_MYO_FPTR_TABLE_SECTION_END, read, write)
+
+// List of MYO shared variable tables
+struct MYOVarTable {
+ typedef SharedTableEntry Entry;
+ const Entry *entries;
+};
+
+class MYOVarTableList : public TableList<MYOVarTable> {
+public:
+ MYOVarTableList() : TableList<Table>()
+ {}
+
+ // add table to the list
+ void add_table(Node *node) {
+ // add table
+ TableList<Table>::add_table(node);
+ }
+
+ // debug dump
+ void dump(void);
+
+ // check if any shared variables
+ bool is_empty();
+
+ // process the table contents for ordinary variables
+ void process();
+
+ // process the table contents for vtable objects
+ void process_vtable();
+};
+
+// List of MYO shared function tables
+struct MYOFuncTable {
+ typedef FptrTableEntry Entry;
+ const Entry *entries;
+};
+
+class MYOFuncTableList : public TableList<MYOFuncTable> {
+public:
+ MYOFuncTableList() : TableList<Table>()
+ {}
+
+ // add table to the list
+ void add_table(Node *node) {
+ // add table
+ TableList<Table>::add_table(node);
+ }
+
+ // debug dump
+ void dump(void);
+
+ // check if any shared functions
+ bool is_empty();
+
+ // process the table contents
+ void process();
+};
+
+// List of MYO shared variable initialization routine tables
+struct MYOInitTable {
+ typedef InitTableEntry Entry;
+ const Entry *entries;
+};
+
+class MYOInitTableList : public TableList<MYOInitTable> {
+public:
+ MYOInitTableList() : TableList<Table>()
+ {}
+
+ // add table to the list
+ void add_table(Node *node) {
+ // add table
+ TableList<Table>::add_table(node);
+ }
+
+ // debug dump
+ void dump(void);
+
+ // check if any init routines
+ bool is_empty();
+
+ // process the table contents
+ void process();
+};
+
+extern MYOVarTableList __offload_myo_var_tables;
+extern MYOVarTableList __offload_myo_vtable_tables;
+extern MYOFuncTableList __offload_myo_func_tables;
+extern MYOInitTableList __offload_myo_init_tables;
+
+extern "C" void __offload_myoRegisterTables1(
+ MYOInitTableList::Node *init_table,
+ MYOVarTableList::Node *shared_table,
+ MYOVarTableList::Node *shared_vtable,
+ MYOFuncTableList::Node *fptr_table
+);
+
+extern "C" void __offload_myoRemoveTables(
+ MYOInitTableList::Node *init_table,
+ MYOVarTableList::Node *shared_table,
+ MYOVarTableList::Node *shared_vtable,
+ MYOFuncTableList::Node *fptr_table
+);
+
+#endif // MYO_SUPPORT
+
#endif // OFFLOAD_TABLE_H_INCLUDED
diff --git a/liboffloadmic/runtime/offload_target.cpp b/liboffloadmic/runtime/offload_target.cpp
index 2e5f91e8c0bf..e3c13746cefe 100644
--- a/liboffloadmic/runtime/offload_target.cpp
+++ b/liboffloadmic/runtime/offload_target.cpp
@@ -1,5 +1,5 @@
/*
- Copyright (c) 2014 Intel Corporation. All Rights Reserved.
+ Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
@@ -114,6 +114,8 @@ static void BufReleaseRef(void * buf)
if (info) {
--info->count;
if (info->count == 0 && info->is_added) {
+ OFFLOAD_TRACE(1, "Calling COIBufferReleaseRef AddRef count = %d\n",
+ ((RefInfo *) ref_data[buf])->count);
BufferReleaseRef(buf);
info->is_added = 0;
}
@@ -360,7 +362,6 @@ void OffloadDescriptor::scatter_copyin_data()
if (m_vars[i].flags.alloc_disp) {
int64_t offset = 0;
m_in.receive_data(&offset, sizeof(offset));
- m_vars[i].offset = -offset;
}
if (VAR_TYPE_IS_DV_DATA_SLICE(type) ||
VAR_TYPE_IS_DV_DATA(type)) {
@@ -369,7 +370,6 @@ void OffloadDescriptor::scatter_copyin_data()
*reinterpret_cast<ArrDesc**>(ptr_addr);
ptr_addr = reinterpret_cast<void**>(&dvp->Base);
}
-
// Set pointer values
switch (type) {
case c_data_ptr_array:
@@ -380,6 +380,9 @@ void OffloadDescriptor::scatter_copyin_data()
*(reinterpret_cast<char**>(m_vars[i].ptr)) :
reinterpret_cast<char*>(m_vars[i].into);
+ if (m_vars[i].flags.is_pointer) {
+ dst_arr_ptr = *((char**)dst_arr_ptr);
+ }
for (; j < max_el; j++) {
if (src_is_for_mic) {
m_vars[j].ptr =
@@ -402,8 +405,8 @@ void OffloadDescriptor::scatter_copyin_data()
case c_data_ptr:
case c_cean_var_ptr:
case c_dv_ptr:
- if (m_vars[i].alloc_if) {
- void *buf;
+ if (m_vars[i].alloc_if && !m_vars[i].flags.preallocated) {
+ void *buf = NULL;
if (m_vars[i].flags.sink_addr) {
m_in.receive_data(&buf, sizeof(buf));
}
@@ -417,9 +420,12 @@ void OffloadDescriptor::scatter_copyin_data()
// increment buffer reference
OFFLOAD_TIMER_START(c_offload_target_add_buffer_refs);
BufferAddRef(buf);
+ OFFLOAD_TRACE(1, "Calling COIBufferAddRef %p\n", buf);
OFFLOAD_TIMER_STOP(c_offload_target_add_buffer_refs);
}
add_ref_count(buf, 0 == m_vars[i].flags.sink_addr);
+ OFFLOAD_TRACE(1, " AddRef count = %d\n",
+ ((RefInfo *) ref_data[buf])->count);
}
ptr = static_cast<char*>(buf) +
m_vars[i].mic_offset +
@@ -597,6 +603,7 @@ void OffloadDescriptor::gather_copyout_data()
case c_dv_ptr:
if (m_vars[i].free_if &&
src_is_for_mic &&
+ !m_vars[i].flags.preallocated &&
!m_vars[i].flags.is_static) {
void *buf = *static_cast<char**>(m_vars[i].ptr) -
m_vars[i].mic_offset -
@@ -610,6 +617,9 @@ void OffloadDescriptor::gather_copyout_data()
BufReleaseRef(buf);
OFFLOAD_TIMER_STOP(c_offload_target_release_buffer_refs);
}
+ if (m_vars[i].flags.preallocated && m_vars[i].alloc_if) {
+ m_out.send_data((void*) m_vars[i].ptr, sizeof(void*));
+ }
break;
case c_func_ptr:
diff --git a/liboffloadmic/runtime/offload_target.h b/liboffloadmic/runtime/offload_target.h
index f3a42f982c85..5638796f2d1d 100644
--- a/liboffloadmic/runtime/offload_target.h
+++ b/liboffloadmic/runtime/offload_target.h
@@ -1,5 +1,5 @@
/*
- Copyright (c) 2014 Intel Corporation. All Rights Reserved.
+ Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
@@ -99,16 +99,16 @@ private:
};
// one time target initialization in main
-extern void __offload_target_init(void);
+DLL_LOCAL extern void __offload_target_init(void);
// logical device index
-extern int mic_index;
+DLL_LOCAL extern int mic_index;
// total number of available logical devices
-extern int mic_engines_total;
+DLL_LOCAL extern int mic_engines_total;
// device frequency (from COI)
-extern uint64_t mic_frequency;
+DLL_LOCAL extern uint64_t mic_frequency;
struct RefInfo {
RefInfo(bool is_add, long amount):is_added(is_add),count(amount)
diff --git a/liboffloadmic/runtime/offload_target_main.cpp b/liboffloadmic/runtime/offload_target_main.cpp
index 90aca8f0812e..b95271c10d5b 100644
--- a/liboffloadmic/runtime/offload_target_main.cpp
+++ b/liboffloadmic/runtime/offload_target_main.cpp
@@ -1,5 +1,5 @@
/*
- Copyright (c) 2014 Intel Corporation. All Rights Reserved.
+ Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
diff --git a/liboffloadmic/runtime/offload_timer.h b/liboffloadmic/runtime/offload_timer.h
index 847f9d15edad..8da1391c24c2 100644
--- a/liboffloadmic/runtime/offload_timer.h
+++ b/liboffloadmic/runtime/offload_timer.h
@@ -1,5 +1,5 @@
/*
- Copyright (c) 2014 Intel Corporation. All Rights Reserved.
+ Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
@@ -36,7 +36,7 @@
#include <stdint.h>
#include "liboffload_error_codes.h"
-extern int timer_enabled;
+DLL_LOCAL extern int timer_enabled;
#ifdef TIMING_SUPPORT
@@ -73,8 +73,8 @@ struct OffloadHostTimerData {
#if HOST_LIBRARY
-extern int offload_report_level;
-extern int offload_report_enabled;
+DLL_LOCAL extern int offload_report_level;
+DLL_LOCAL extern int offload_report_enabled;
#define OFFLOAD_REPORT_1 1
#define OFFLOAD_REPORT_2 2
#define OFFLOAD_REPORT_3 3
@@ -121,18 +121,18 @@ extern int offload_report_enabled;
offload_timer_fill_host_mic_num(timer_data, data); \
}
-extern void offload_timer_start(OffloadHostTimerData *,
+extern DLL_LOCAL void offload_timer_start(OffloadHostTimerData *,
OffloadHostPhase t_node);
-extern void offload_timer_stop(OffloadHostTimerData *,
+extern DLL_LOCAL void offload_timer_stop(OffloadHostTimerData *,
OffloadHostPhase t_node);
-extern OffloadHostTimerData * offload_timer_init(const char *file, int line);
-extern void offload_timer_fill_target_data(OffloadHostTimerData *,
+extern DLL_LOCAL OffloadHostTimerData * offload_timer_init(const char *file, int line);
+extern DLL_LOCAL void offload_timer_fill_target_data(OffloadHostTimerData *,
void *data);
-extern void offload_timer_fill_host_sdata(OffloadHostTimerData *,
+extern DLL_LOCAL void offload_timer_fill_host_sdata(OffloadHostTimerData *,
uint64_t sent_bytes);
-extern void offload_timer_fill_host_rdata(OffloadHostTimerData *,
+extern DLL_LOCAL void offload_timer_fill_host_rdata(OffloadHostTimerData *,
uint64_t sent_bytes);
-extern void offload_timer_fill_host_mic_num(OffloadHostTimerData *,
+extern DLL_LOCAL void offload_timer_fill_host_mic_num(OffloadHostTimerData *,
int card_number);
// Utility structure for starting/stopping timer
@@ -172,10 +172,10 @@ private:
#define OFFLOAD_TIMER_TARGET_DATA(data) \
if (timer_enabled) offload_timer_fill_target_data(data);
-extern void offload_timer_start(OffloadTargetPhase t_node);
-extern void offload_timer_stop(OffloadTargetPhase t_node);
-extern void offload_timer_init(void);
-extern void offload_timer_fill_target_data(void *data);
+extern DLL_LOCAL void offload_timer_start(OffloadTargetPhase t_node);
+extern DLL_LOCAL void offload_timer_stop(OffloadTargetPhase t_node);
+extern DLL_LOCAL void offload_timer_init(void);
+extern DLL_LOCAL void offload_timer_fill_target_data(void *data);
#endif // HOST_LIBRARY
diff --git a/liboffloadmic/runtime/offload_timer_host.cpp b/liboffloadmic/runtime/offload_timer_host.cpp
index 719af887abc6..2152e1e8f209 100644
--- a/liboffloadmic/runtime/offload_timer_host.cpp
+++ b/liboffloadmic/runtime/offload_timer_host.cpp
@@ -1,5 +1,5 @@
/*
- Copyright (c) 2014 Intel Corporation. All Rights Reserved.
+ Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
diff --git a/liboffloadmic/runtime/offload_timer_target.cpp b/liboffloadmic/runtime/offload_timer_target.cpp
index 8dc4bbcc81dd..9e2b6d1f156e 100644
--- a/liboffloadmic/runtime/offload_timer_target.cpp
+++ b/liboffloadmic/runtime/offload_timer_target.cpp
@@ -1,5 +1,5 @@
/*
- Copyright (c) 2014 Intel Corporation. All Rights Reserved.
+ Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
diff --git a/liboffloadmic/runtime/offload_trace.cpp b/liboffloadmic/runtime/offload_trace.cpp
index 4ba678cee37a..2609360d5f3d 100644
--- a/liboffloadmic/runtime/offload_trace.cpp
+++ b/liboffloadmic/runtime/offload_trace.cpp
@@ -1,5 +1,5 @@
/*
- Copyright (c) 2014 Intel Corporation. All Rights Reserved.
+ Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
@@ -73,7 +73,7 @@ static const char * offload_stage(std::stringstream &ss,
return 0;
}
-static const char * offload_signal(std::stringstream &ss,
+static const char * offload_message_2str(std::stringstream &ss,
int offload_number,
const char *tag,
const char *text)
@@ -216,27 +216,57 @@ void offload_stage_print(int stage, int offload_number, ...)
uint64_t *signal;
str1 = report_get_message_str(c_report_state_signal);
str2 = report_get_message_str(c_report_signal);
- offload_signal(ss, offload_number, str1, str2);
- signal = va_arg(va_args, uint64_t*);
- if (signal)
- ss << " 0x" << std::hex << *signal;
+ offload_message_2str(ss, offload_number, str1, str2);
+ signal = va_arg(va_args, uint64_t*);
+ if (signal)
+ ss << " 0x" << std::hex << *signal;
else
- ss << " none";
+ ss << " none";
+ }
+ break;
+ case c_offload_stream:
+ {
+ int64_t stream;
+ str1 = report_get_message_str(c_report_state_stream);
+ str2 = report_get_message_str(c_report_stream);
+ offload_message_2str(ss, offload_number, str1, str2);
+ stream = va_arg(va_args, int64_t);
+ if (stream)
+ ss << " 0x" << std::hex << stream;
+ else
+ ss << " none";
}
break;
case c_offload_wait:
{
int count;
+ OffloadWaitKind kind;
uint64_t **signal;
- str1 = report_get_message_str(c_report_state_signal);
+ kind = (enum OffloadWaitKind) va_arg(va_args, int);
+ // kind == c_offload_wait_signal for signal;
+ // other kinds are for stream
+ if (kind == c_offload_wait_signal) {
+ str1 = report_get_message_str(c_report_state_signal);
+ }
+ else {
+ str1 = report_get_message_str(c_report_state_stream);
+ }
str2 = report_get_message_str(c_report_wait);
- offload_signal(ss, offload_number, str1, str2);
+ offload_message_2str(ss, offload_number, str1, str2);
count = va_arg(va_args, int);
signal = va_arg(va_args, uint64_t**);
if (count) {
- while (count) {
- ss << " " << std::hex << signal[count-1];
- count--;
+ if (kind == c_offload_wait_signal) {
+ while (count) {
+ ss << " " << std::hex << signal[count-1];
+ count--;
+ }
+ }
+ else if (kind == c_offload_wait_stream) {
+ ss << signal;
+ }
+ else {
+ ss << " all";
}
}
else
@@ -304,6 +334,7 @@ void offload_stage_print(int stage, int offload_number, ...)
str1 = report_get_message_str(c_report_state);
str2 = report_get_message_str(c_report_myosharedalignedfree);
offload_stage(ss, offload_number, str1, str2, false);
+ ss << " " << va_arg(va_args, size_t);
break;
case c_offload_myoacquire:
str1 = report_get_message_str(c_report_state);
@@ -315,6 +346,55 @@ void offload_stage_print(int stage, int offload_number, ...)
str2 = report_get_message_str(c_report_myorelease);
offload_stage(ss, offload_number, str1, str2, false);
break;
+ case c_offload_myosupportsfeature:
+ str1 = report_get_message_str(c_report_state);
+ str2 = report_get_message_str(c_report_myosupportsfeature);
+ offload_stage(ss, offload_number, str1, str2, false);
+ va_arg(va_args, int);
+ ss << " " << va_arg(va_args, int);
+ ss << " " << va_arg(va_args, int);
+ ss << " " << va_arg(va_args, int);
+ break;
+ case c_offload_myosharedarenacreate:
+ str1 = report_get_message_str(c_report_state);
+ str2 = report_get_message_str(c_report_myosharedarenacreate);
+ offload_stage(ss, offload_number, str1, str2, false);
+ va_arg(va_args, char*);
+ ss << " " << va_arg(va_args, int);
+ ss << " " << va_arg(va_args, int);
+ ss << " " << va_arg(va_args, unsigned int);
+ break;
+ case c_offload_myosharedalignedarenamalloc:
+ str1 = report_get_message_str(c_report_state);
+ str2 = report_get_message_str(c_report_myosharedalignedarenamalloc);
+ offload_stage(ss, offload_number, str1, str2, false);
+ va_arg(va_args, char*);
+ ss << " " << va_arg(va_args, int);
+ ss << " " << va_arg(va_args, size_t);
+ ss << " " << va_arg(va_args, size_t);
+ break;
+ case c_offload_myosharedalignedarenafree:
+ str1 = report_get_message_str(c_report_state);
+ str2 = report_get_message_str(c_report_myosharedalignedarenafree);
+ offload_stage(ss, offload_number, str1, str2, false);
+ va_arg(va_args, char*);
+ ss << " " << va_arg(va_args, int);
+ ss << " " << va_arg(va_args, size_t);
+ break;
+ case c_offload_myoarenaacquire:
+ str1 = report_get_message_str(c_report_state);
+ str2 = report_get_message_str(c_report_myoarenaacquire);
+ offload_stage(ss, offload_number, str1, str2, false);
+ va_arg(va_args, char*);
+ ss << " " << va_arg(va_args, int);
+ break;
+ case c_offload_myoarenarelease:
+ str1 = report_get_message_str(c_report_state);
+ str2 = report_get_message_str(c_report_myoarenarelease);
+ offload_stage(ss, offload_number, str1, str2, false);
+ va_arg(va_args, char*);
+ ss << " " << va_arg(va_args, int);
+ break;
default:
LIBOFFLOAD_ERROR(c_report_unknown_trace_node);
abort();
diff --git a/liboffloadmic/runtime/offload_trace.h b/liboffloadmic/runtime/offload_trace.h
index 02a0c8794dbc..1c2a6f4c050e 100644
--- a/liboffloadmic/runtime/offload_trace.h
+++ b/liboffloadmic/runtime/offload_trace.h
@@ -1,5 +1,5 @@
/*
- Copyright (c) 2014 Intel Corporation. All Rights Reserved.
+ Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
@@ -29,8 +29,9 @@
// The parts of the offload library common to host and target
+#include "offload_util.h"
-void offload_stage_print(int stage, int offload_number, ...);
+DLL_LOCAL void offload_stage_print(int stage, int offload_number, ...);
enum OffloadTraceStage {
// Total time spent on the target
@@ -68,5 +69,18 @@ enum OffloadTraceStage {
c_offload_myosharedalignedfree,
c_offload_myoacquire,
c_offload_myorelease,
- c_offload_myofini
+ c_offload_myofini,
+ c_offload_myosupportsfeature,
+ c_offload_myosharedarenacreate,
+ c_offload_myosharedalignedarenamalloc,
+ c_offload_myosharedalignedarenafree,
+ c_offload_myoarenaacquire,
+ c_offload_myoarenarelease,
+ c_offload_stream
+};
+
+enum OffloadWaitKind {
+ c_offload_wait_signal = 0,
+ c_offload_wait_stream,
+ c_offload_wait_all_streams
};
diff --git a/liboffloadmic/runtime/offload_util.cpp b/liboffloadmic/runtime/offload_util.cpp
index ae6a75916d43..5217e91c2054 100644
--- a/liboffloadmic/runtime/offload_util.cpp
+++ b/liboffloadmic/runtime/offload_util.cpp
@@ -1,5 +1,5 @@
/*
- Copyright (c) 2014 Intel Corporation. All Rights Reserved.
+ Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
diff --git a/liboffloadmic/runtime/offload_util.h b/liboffloadmic/runtime/offload_util.h
index 2cffd82f70a4..894355aca7a8 100644
--- a/liboffloadmic/runtime/offload_util.h
+++ b/liboffloadmic/runtime/offload_util.h
@@ -1,5 +1,5 @@
/*
- Copyright (c) 2014 Intel Corporation. All Rights Reserved.
+ Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
@@ -31,14 +31,23 @@
#ifndef OFFLOAD_UTIL_H_INCLUDED
#define OFFLOAD_UTIL_H_INCLUDED
-#include <stdio.h>
#include <stdlib.h>
-#include <stdint.h>
#ifdef TARGET_WINNT
+ #define DLL_LOCAL
+#else
+ #define DLL_LOCAL __attribute__((visibility("hidden")))
+#endif
+
+#ifdef TARGET_WINNT
+// Don't use <stdint.h> as compiling with VS2010 makes ofldbegin.obj
+// incompatible with STL library of versions older than VS2010.
+typedef unsigned long long int uint64_t;
+typedef signed long long int int64_t;
#include <windows.h>
#include <process.h>
#else // TARGET_WINNT
+#include <stdint.h>
#include <dlfcn.h>
#include <pthread.h>
#endif // TARGET_WINNT
@@ -143,7 +152,7 @@ int DL_addr(const void *addr, Dl_info *info);
#define DL_addr(addr, info) dladdr((addr), (info))
#endif // TARGET_WINNT
-extern void* DL_sym(void *handle, const char *name, const char *version);
+DLL_LOCAL extern void* DL_sym(void *handle, const char *name, const char *version);
// One-time initialization API
#ifdef TARGET_WINNT
@@ -159,13 +168,13 @@ typedef pthread_once_t OffloadOnceControl;
#endif // TARGET_WINNT
// Parses size specification string.
-extern bool __offload_parse_size_string(const char *str, uint64_t &new_size);
+DLL_LOCAL extern bool __offload_parse_size_string(const char *str, uint64_t &new_size);
// Parses string with integer value
-extern bool __offload_parse_int_string(const char *str, int64_t &value);
+DLL_LOCAL extern bool __offload_parse_int_string(const char *str, int64_t &value);
// get value by its base, offset and size
-int64_t get_el_value(
+DLL_LOCAL int64_t get_el_value(
char *base,
int64_t offset,
int64_t size
diff --git a/liboffloadmic/runtime/ofldbegin.cpp b/liboffloadmic/runtime/ofldbegin.cpp
index 6f4b536f5b73..236500d011a7 100644
--- a/liboffloadmic/runtime/ofldbegin.cpp
+++ b/liboffloadmic/runtime/ofldbegin.cpp
@@ -1,5 +1,5 @@
/*
- Copyright (c) 2014 Intel Corporation. All Rights Reserved.
+ Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
@@ -29,7 +29,7 @@
#if HOST_LIBRARY
-#include "offload_host.h"
+#include "offload_table.h"
#include "offload_myo_host.h"
#else
#include "compiler_if_target.h"
@@ -37,6 +37,14 @@
#include "offload_myo_target.h"
#endif
+// Initializes library and registers specified offload image.
+// Don't use this declarations from offload_host.h as offload_table.h
+// is used instead of it. Using offload_host.h contradicts with
+// STL library compiled with VS2010.
+extern "C" bool __offload_register_image(const void* image);
+extern "C" void __offload_unregister_image(const void* image);
+extern "C" bool __offload_target_image_is_executable(const void *image);
+
#ifdef TARGET_WINNT
#define ALLOCATE(name) __declspec(allocate(name))
#define DLL_LOCAL
@@ -110,33 +118,127 @@ static VarList::Node __offload_var_node = {
#ifdef MYO_SUPPORT
// offload myo shared var section prolog
+// first element is empty
ALLOCATE(OFFLOAD_MYO_SHARED_TABLE_SECTION_START)
#ifdef TARGET_WINNT
__declspec(align(sizeof(SharedTableEntry)))
#endif // TARGET_WINNT
-static SharedTableEntry __offload_myo_shared_table_start = { 0 };
+static MYOVarTable::Entry __offload_myo_shared_var_start = { 0 };
+
+// list element for the current module
+// table entry pointer skips the empty first entry
+static MYOVarTableList::Node __offload_myo_shared_var_node = {
+ { &__offload_myo_shared_var_start + 1 },
+ 0, 0
+};
+
+// offload myo shared vtable section prolog
+// first element is empty
+ALLOCATE(OFFLOAD_MYO_SHARED_VTABLE_SECTION_START)
+#ifdef TARGET_WINNT
+__declspec(align(sizeof(SharedTableEntry)))
+#endif // TARGET_WINNT
+static MYOVarTable::Entry __offload_myo_shared_vtable_start = { 0 };
+
+// list element for the current module
+// table entry pointer skips the empty first entry
+static MYOVarTableList::Node __offload_myo_shared_vtable_node = {
+ { &__offload_myo_shared_vtable_start + 1 },
+ 0, 0
+};
-#if HOST_LIBRARY
// offload myo shared var init section prolog
+// first element is empty
ALLOCATE(OFFLOAD_MYO_SHARED_INIT_TABLE_SECTION_START)
#ifdef TARGET_WINNT
__declspec(align(sizeof(InitTableEntry)))
#endif // TARGET_WINNT
-static InitTableEntry __offload_myo_shared_init_table_start = { 0 };
+static MYOInitTable::Entry __offload_myo_init_table_start = { 0 };
+
+// list element for the current module
+// table entry pointer skips the empty first entry
+static MYOInitTableList::Node __offload_myo_init_table_node = {
+ { &__offload_myo_init_table_start + 1 },
+ 0, 0
+};
+
+// The functions and variables needed for a built-in
+// remote function entry for vtable initialization on MIC
+
+#if !HOST_LIBRARY
+MyoError __offload_init_vtables(void)
+{
+ SharedTableEntry *t_start;
+
+ //OFFLOAD_DEBUG_TRACE(3, "%s\n", __func__);
+ t_start = &__offload_myo_shared_vtable_start + 1;
+ //OFFLOAD_DEBUG_TRACE(3, "%s(%p)\n", __func__, t_start);
+ while (t_start->varName != 0) {
+ //OFFLOAD_DEBUG_TRACE(4,
+ // "myo shared vtable \"%s\" &myo_ptr = %p myo_ptr = %p\n",
+ // t_start->varName,
+ // (void *)(t_start->sharedAddr),
+ // ((void **)(t_start->sharedAddr))[0]);
+ t_start++;
+ }
+
+ __offload_myo_shared_init_table_process(
+ &__offload_myo_init_table_start + 1);
+ return MYO_SUCCESS;
+}
+#endif // !HOST_LIBRARY
+
+static void vtable_initializer()
+{
+}
+
+#if !HOST_LIBRARY
+static MyoError vtable_initializer_wrapper()
+{
+ __offload_myoAcquire();
+ __offload_init_vtables();
+ __offload_myoRelease();
+ return MYO_SUCCESS;
+}
#endif
+static void* __offload_vtable_initializer_thunk_ptr = 0;
+
// offload myo fptr section prolog
+// first element is pre-initialized to the MIC vtable initializer
ALLOCATE(OFFLOAD_MYO_FPTR_TABLE_SECTION_START)
#ifdef TARGET_WINNT
__declspec(align(sizeof(FptrTableEntry)))
#endif // TARGET_WINNT
-static FptrTableEntry __offload_myo_fptr_table_start = { 0 };
+static MYOFuncTable::Entry __offload_myo_fptr_table_start = {
+#if HOST_LIBRARY
+ "--vtable_initializer--",
+ (void*)&vtable_initializer,
+ (void*)&__offload_vtable_initializer_thunk_ptr,
+#ifdef TARGET_WINNT
+ // Dummy to pad up to 32 bytes
+ 0
+#endif // TARGET_WINNT
+#else // HOST_LIBRARY
+ "--vtable_initializer--",
+ (void*)&vtable_initializer,
+ (void*)&vtable_initializer_wrapper,
+ &__offload_vtable_initializer_thunk_ptr,
+#endif // HOST_LIBRARY
+};
+
+// list element for the current module
+static MYOFuncTableList::Node __offload_myo_fptr_table_node = {
+ { &__offload_myo_fptr_table_start },
+ 0, 0
+};
#endif // MYO_SUPPORT
// init/fini code which adds/removes local lookup data to/from the global list
static void offload_fini();
+static void offload_fini_so();
#ifndef TARGET_WINNT
static void offload_init() __attribute__((constructor(101)));
@@ -150,35 +252,81 @@ static void (*addressof_offload_init)() = offload_init;
static void offload_init()
{
+ bool success;
+
// register offload tables
__offload_register_tables(&__offload_entry_node,
&__offload_func_node,
&__offload_var_node);
#if HOST_LIBRARY
- __offload_register_image(&__offload_target_image);
- atexit(offload_fini);
+ success = __offload_register_image(&__offload_target_image);
+ if (!success)
+ {
+ return;
+ }
#endif // HOST_LIBRARY
-
#ifdef MYO_SUPPORT
- __offload_myoRegisterTables(
#if HOST_LIBRARY
- &__offload_myo_shared_init_table_start + 1,
-#endif // HOST_LIBRARY
- &__offload_myo_shared_table_start + 1,
- &__offload_myo_fptr_table_start + 1
+ // If this was the main program register main atexit routine
+ if (__offload_myoProcessTables(
+ &__offload_target_image,
+ &__offload_myo_init_table_node,
+ &__offload_myo_shared_var_node,
+ &__offload_myo_shared_vtable_node,
+ &__offload_myo_fptr_table_node))
+ {
+ atexit(offload_fini);
+#ifdef TARGET_WINNT
+ } else {
+ atexit(offload_fini_so);
+#endif
+ }
+#else // HOST_LIBRARY
+ __offload_myoProcessTables(
+ &__offload_myo_init_table_start + 1,
+ &__offload_myo_shared_var_start + 1,
+ &__offload_myo_shared_vtable_start + 1,
+ &__offload_myo_fptr_table_start
);
+#endif // HOST_LIBRARY
#endif // MYO_SUPPORT
}
+#ifndef TARGET_WINNT
+static void offload_fini_so() __attribute__((destructor(101)));
+#else // TARGET_WINNT
+static void offload_init_so();
+#endif // TARGET_WINNT
+
static void offload_fini()
{
#if HOST_LIBRARY
__offload_unregister_image(&__offload_target_image);
#endif // HOST_LIBRARY
+}
- // unregister offload tables
+static void offload_fini_so()
+{
+ // Offload and MYO tables need to be removed from list
+ // to prevent invalid accesses after dlclose
+ // Remove offload tables
__offload_unregister_tables(&__offload_entry_node,
&__offload_func_node,
&__offload_var_node);
+#if HOST_LIBRARY
+ if(!__offload_target_image_is_executable(&__offload_target_image)) {
+ __offload_unregister_image(&__offload_target_image);
+ }
+#endif
+#ifdef MYO_SUPPORT
+#if HOST_LIBRARY
+ // Remove MYO tables
+ __offload_myoRemoveTables(
+ &__offload_myo_init_table_node,
+ &__offload_myo_shared_var_node,
+ &__offload_myo_shared_vtable_node,
+ &__offload_myo_fptr_table_node);
+#endif // HOST_LIBRARY
+#endif // MYO_SUPPORT
}
diff --git a/liboffloadmic/runtime/ofldend.cpp b/liboffloadmic/runtime/ofldend.cpp
index 0256c5a0f209..1e522b7f2fa0 100644
--- a/liboffloadmic/runtime/ofldend.cpp
+++ b/liboffloadmic/runtime/ofldend.cpp
@@ -1,5 +1,5 @@
/*
- Copyright (c) 2014 Intel Corporation. All Rights Reserved.
+ Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
@@ -29,7 +29,7 @@
#if HOST_LIBRARY
-#include "offload_host.h"
+#include "offload_table.h"
#include "offload_myo_host.h"
#else
#include "offload_target.h"
@@ -69,29 +69,42 @@ static VarTable::Entry __offload_var_table_end = { (const char*)-1 };
ALLOCATE(OFFLOAD_MYO_SHARED_TABLE_SECTION_END)
#ifdef TARGET_WINNT
__declspec(align(sizeof(SharedTableEntry)))
-static SharedTableEntry __offload_myo_shared_table_end = { (const char*)-1, 0 };
+static MYOVarTable::Entry __offload_myo_shared_var_end =
+ { (const char*)-1, 0 };
#else // TARGET_WINNT
-static SharedTableEntry __offload_myo_shared_table_end = { 0 };
+static MYOVarTable::Entry __offload_myo_shared_var_end = { 0 };
#endif // TARGET_WINNT
-#if HOST_LIBRARY
+// offload myo shared vtable section epilog
+ALLOCATE(OFFLOAD_MYO_SHARED_VTABLE_SECTION_END)
+#ifdef TARGET_WINNT
+__declspec(align(sizeof(SharedTableEntry)))
+static MYOVarTable::Entry __offload_myo_shared_vtable_end =
+ { (const char*)-1, 0 };
+#else // TARGET_WINNT
+static MYOVarTable::Entry __offload_myo_shared_vtable_end = { 0 };
+#endif // TARGET_WINNT
+
+//#if HOST_LIBRARY
// offload myo shared var init section epilog
ALLOCATE(OFFLOAD_MYO_SHARED_INIT_TABLE_SECTION_END)
#ifdef TARGET_WINNT
__declspec(align(sizeof(InitTableEntry)))
-static InitTableEntry __offload_myo_shared_init_table_end = { (const char*)-1, 0 };
+static MYOInitTable::Entry __offload_myo_init_table_end =
+ { (const char*)-1, 0 };
#else // TARGET_WINNT
-static InitTableEntry __offload_myo_shared_init_table_end = { 0 };
+static MYOInitTable::Entry __offload_myo_init_table_end = { 0 };
#endif // TARGET_WINNT
-#endif // HOST_LIBRARY
+//#endif // HOST_LIBRARY
// offload myo fptr section epilog
ALLOCATE(OFFLOAD_MYO_FPTR_TABLE_SECTION_END)
#ifdef TARGET_WINNT
__declspec(align(sizeof(FptrTableEntry)))
-static FptrTableEntry __offload_myo_fptr_table_end = { (const char*)-1, 0, 0 };
+static MYOFuncTable::Entry __offload_myo_fptr_table_end =
+ { (const char*)-1, 0, 0 };
#else // TARGET_WINNT
-static FptrTableEntry __offload_myo_fptr_table_end = { 0 };
+static MYOFuncTable::Entry __offload_myo_fptr_table_end = { 0 };
#endif // TARGET_WINNT
#endif // MYO_SUPPORT
diff --git a/liboffloadmic/runtime/orsl-lite/include/orsl-lite.h b/liboffloadmic/runtime/orsl-lite/include/orsl-lite.h
index b629a1a91b50..53acea154de4 100644
--- a/liboffloadmic/runtime/orsl-lite/include/orsl-lite.h
+++ b/liboffloadmic/runtime/orsl-lite/include/orsl-lite.h
@@ -1,5 +1,5 @@
/*
- Copyright (c) 2014 Intel Corporation. All Rights Reserved.
+ Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
diff --git a/liboffloadmic/runtime/orsl-lite/lib/orsl-lite.c b/liboffloadmic/runtime/orsl-lite/lib/orsl-lite.c
index af01c119c35e..add67a0b0ca0 100644
--- a/liboffloadmic/runtime/orsl-lite/lib/orsl-lite.c
+++ b/liboffloadmic/runtime/orsl-lite/lib/orsl-lite.c
@@ -1,5 +1,5 @@
/*
- Copyright (c) 2014 Intel Corporation. All Rights Reserved.
+ Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
diff --git a/liboffloadmic/runtime/use_mpss2.txt b/liboffloadmic/runtime/use_mpss2.txt
index 948f4838fbfb..47b322c971c3 100644
--- a/liboffloadmic/runtime/use_mpss2.txt
+++ b/liboffloadmic/runtime/use_mpss2.txt
@@ -1 +1 @@
-2.1.6720-13
+3.4.1