diff --git a/src/aparapi/com.amd.aparapi.jni/dist.fpga/README.md b/src/aparapi/com.amd.aparapi.jni/dist.fpga/README.md new file mode 100644 index 0000000000000000000000000000000000000000..5da4963cee6582730bcffb02e2c5a172d0aaa116 --- /dev/null +++ b/src/aparapi/com.amd.aparapi.jni/dist.fpga/README.md @@ -0,0 +1,10 @@ +--------- +README +--------- +some scripts use the following notation to find libaparapi_x86_64.so => -Djava.library.path=../../com.amd.aparapi.jni/dist.[X] +Where X stands for platform type name: std, fpga etc. +build and copy the X version of libaparapi_x86_64.so to this folder so scripts can pick it up: +goto: com.amd.aparapi.jni +run: ant -f build_X_.xml +copy: dist/libaparapi_x86_64.so dist.X + diff --git a/src/aparapi/com.amd.aparapi.jni/dist.std/README.md b/src/aparapi/com.amd.aparapi.jni/dist.std/README.md new file mode 100644 index 0000000000000000000000000000000000000000..5da4963cee6582730bcffb02e2c5a172d0aaa116 --- /dev/null +++ b/src/aparapi/com.amd.aparapi.jni/dist.std/README.md @@ -0,0 +1,10 @@ +--------- +README +--------- +some scripts use the following notation to find libaparapi_x86_64.so => -Djava.library.path=../../com.amd.aparapi.jni/dist.[X] +Where X stands for platform type name: std, fpga etc. +build and copy the X version of libaparapi_x86_64.so to this folder so scripts can pick it up: +goto: com.amd.aparapi.jni +run: ant -f build_X_.xml +copy: dist/libaparapi_x86_64.so dist.X + diff --git a/src/aparapi/com.amd.aparapi.jni/dist/libaparapi_x86_64.so b/src/aparapi/com.amd.aparapi.jni/dist/libaparapi_x86_64.so index b54beb2c36c3d06d6747102c2a59023f077791f6..95d77cecc323704ccb9f71fdb6fdbb71b16fdbfe 100755 Binary files a/src/aparapi/com.amd.aparapi.jni/dist/libaparapi_x86_64.so and b/src/aparapi/com.amd.aparapi.jni/dist/libaparapi_x86_64.so differ diff --git a/src/aparapi/com.amd.aparapi.jni/include/com_amd_aparapi_Kernel_FlowType.h b/src/aparapi/com.amd.aparapi.jni/include/com_amd_aparapi_Kernel_FlowType.h new file mode 100644 index 0000000000000000000000000000000000000000..30b2ec599b5bfd797767784db5146dc0ede8033e --- /dev/null +++ b/src/aparapi/com.amd.aparapi.jni/include/com_amd_aparapi_Kernel_FlowType.h @@ -0,0 +1,13 @@ +/* DO NOT EDIT THIS FILE - it is machine generated */ +#include <jni.h> +/* Header for class com_amd_aparapi_Kernel_FlowType */ + +#ifndef _Included_com_amd_aparapi_Kernel_FlowType +#define _Included_com_amd_aparapi_Kernel_FlowType +#ifdef __cplusplus +extern "C" { +#endif +#ifdef __cplusplus +} +#endif +#endif diff --git a/src/aparapi/com.amd.aparapi.jni/include/com_amd_aparapi_Range.h b/src/aparapi/com.amd.aparapi.jni/include/com_amd_aparapi_Range.h index 8de80a9844444d079086e7674c634aa936688d44..5a4f770f9a1bdac9398c4c2a4c40bb0131a2db64 100644 --- a/src/aparapi/com.amd.aparapi.jni/include/com_amd_aparapi_Range.h +++ b/src/aparapi/com.amd.aparapi.jni/include/com_amd_aparapi_Range.h @@ -10,7 +10,7 @@ extern "C" { #undef com_amd_aparapi_Range_THREADS_PER_CORE #define com_amd_aparapi_Range_THREADS_PER_CORE 16L #undef com_amd_aparapi_Range_MAX_OPENCL_GROUP_SIZE -#define com_amd_aparapi_Range_MAX_OPENCL_GROUP_SIZE 256L +#define com_amd_aparapi_Range_MAX_OPENCL_GROUP_SIZE 1024L #ifdef __cplusplus } #endif diff --git a/src/aparapi/com.amd.aparapi.jni/include/com_amd_aparapi_internal_jni_KernelRunnerJNI.h b/src/aparapi/com.amd.aparapi.jni/include/com_amd_aparapi_internal_jni_KernelRunnerJNI.h index 555f91426e90e4f115ba50a36bdb2132fe2c7532..6791475b00edd9bff51e1b85aab0114f4146b019 100644 --- a/src/aparapi/com.amd.aparapi.jni/include/com_amd_aparapi_internal_jni_KernelRunnerJNI.h +++ b/src/aparapi/com.amd.aparapi.jni/include/com_amd_aparapi_internal_jni_KernelRunnerJNI.h @@ -51,6 +51,12 @@ extern "C" { #define com_amd_aparapi_internal_jni_KernelRunnerJNI_ARG_STATIC 4194304L #undef com_amd_aparapi_internal_jni_KernelRunnerJNI_JNI_FLAG_USE_GPU #define com_amd_aparapi_internal_jni_KernelRunnerJNI_JNI_FLAG_USE_GPU 4L +#undef com_amd_aparapi_internal_jni_KernelRunnerJNI_JNI_FLAG_SOURCE_FLOW +#define com_amd_aparapi_internal_jni_KernelRunnerJNI_JNI_FLAG_SOURCE_FLOW 1L +#undef com_amd_aparapi_internal_jni_KernelRunnerJNI_JNI_FLAG_BINARY_FLOW +#define com_amd_aparapi_internal_jni_KernelRunnerJNI_JNI_FLAG_BINARY_FLOW 2L +#undef com_amd_aparapi_internal_jni_KernelRunnerJNI_JNI_FLAG_DEFAULT_FLOW +#define com_amd_aparapi_internal_jni_KernelRunnerJNI_JNI_FLAG_DEFAULT_FLOW 4L /* * Class: com_amd_aparapi_internal_jni_KernelRunnerJNI * Method: initJNI @@ -70,10 +76,10 @@ JNIEXPORT jint JNICALL Java_com_amd_aparapi_internal_jni_KernelRunnerJNI_getJNI /* * Class: com_amd_aparapi_internal_jni_KernelRunnerJNI * Method: buildProgramJNI - * Signature: (JLjava/lang/String;)J + * Signature: (JLjava/lang/String;I)J */ JNIEXPORT jlong JNICALL Java_com_amd_aparapi_internal_jni_KernelRunnerJNI_buildProgramJNI - (JNIEnv *, jobject, jlong, jstring); + (JNIEnv *, jobject, jlong, jstring, jint); /* * Class: com_amd_aparapi_internal_jni_KernelRunnerJNI diff --git a/src/aparapi/com.amd.aparapi.jni/include/com_amd_aparapi_internal_kernel_KernelRunner.h b/src/aparapi/com.amd.aparapi.jni/include/com_amd_aparapi_internal_kernel_KernelRunner.h index b9f11d7f81c45c1c24027e6723d40a0310abae83..2819cc7d35fcfd374c54f71d439e6d1627547b11 100644 --- a/src/aparapi/com.amd.aparapi.jni/include/com_amd_aparapi_internal_kernel_KernelRunner.h +++ b/src/aparapi/com.amd.aparapi.jni/include/com_amd_aparapi_internal_kernel_KernelRunner.h @@ -51,6 +51,12 @@ extern "C" { #define com_amd_aparapi_internal_kernel_KernelRunner_ARG_STATIC 4194304L #undef com_amd_aparapi_internal_kernel_KernelRunner_JNI_FLAG_USE_GPU #define com_amd_aparapi_internal_kernel_KernelRunner_JNI_FLAG_USE_GPU 4L +#undef com_amd_aparapi_internal_kernel_KernelRunner_JNI_FLAG_SOURCE_FLOW +#define com_amd_aparapi_internal_kernel_KernelRunner_JNI_FLAG_SOURCE_FLOW 1L +#undef com_amd_aparapi_internal_kernel_KernelRunner_JNI_FLAG_BINARY_FLOW +#define com_amd_aparapi_internal_kernel_KernelRunner_JNI_FLAG_BINARY_FLOW 2L +#undef com_amd_aparapi_internal_kernel_KernelRunner_JNI_FLAG_DEFAULT_FLOW +#define com_amd_aparapi_internal_kernel_KernelRunner_JNI_FLAG_DEFAULT_FLOW 4L #ifdef __cplusplus } #endif diff --git a/src/aparapi/com.amd.aparapi.jni/src/cpp/invoke/OpenCLJNI.cpp b/src/aparapi/com.amd.aparapi.jni/src/cpp/invoke/OpenCLJNI.cpp index a4a49cf8bad22f0cb71ac0b46932e3934b8f45ac..71f36615cb6ef859811a77248c48cce45fc03b27 100644 --- a/src/aparapi/com.amd.aparapi.jni/src/cpp/invoke/OpenCLJNI.cpp +++ b/src/aparapi/com.amd.aparapi.jni/src/cpp/invoke/OpenCLJNI.cpp @@ -336,7 +336,8 @@ JNI_JAVA(void, OpenCLJNI, invoke) } JNI_JAVA(jobject, OpenCLJNI, getPlatforms) - (JNIEnv *jenv, jobject jobj) { + (JNIEnv *jenv, jobject jobj) + { jobject platformListInstance = JNIHelper::createInstance(jenv, ArrayListClass, VoidReturn); cl_int status = CL_SUCCESS; cl_uint platformc; @@ -352,43 +353,23 @@ JNI_JAVA(jobject, OpenCLJNI, getPlatforms) status = clGetPlatformInfo(platformIds[platformIdx], CL_PLATFORM_VERSION, sizeof(platformVersionName), platformVersionName, NULL); fprintf(stderr, "platform version %d %s\n", platformIdx, platformVersionName); - // fix this so OpenCL 1.3 or higher will not break! - if ( -#ifdef ALTERA_OPENCL - 1 // !!! oren fix for bad platform version check - || -#endif - !strncmp(platformVersionName, "OpenCL 1.2", 10) - || !strncmp(platformVersionName, "OpenCL 1.1", 10) -#ifdef __APPLE__ - || !strncmp(platformVersionName, "OpenCL 1.0", 10) -#endif - ) - { - char platformVendorName[512]; - char platformName[512]; - status = clGetPlatformInfo(platformIds[platformIdx], CL_PLATFORM_VENDOR, sizeof(platformVendorName), platformVendorName, NULL); - status = clGetPlatformInfo(platformIds[platformIdx], CL_PLATFORM_NAME, sizeof(platformName), platformName, NULL); - fprintf(stderr, "platform vendor %d %s\n", platformIdx, platformVendorName); - fprintf(stderr, "platform version %d %s\n", platformIdx, platformVersionName); - jobject platformInstance = JNIHelper::createInstance(jenv, OpenCLPlatformClass , ArgsVoidReturn(LongArg StringClassArg StringClassArg StringClassArg ), + char platformVendorName[512]; + char platformName[512]; + status = clGetPlatformInfo(platformIds[platformIdx], CL_PLATFORM_VENDOR, sizeof(platformVendorName), platformVendorName, NULL); + status = clGetPlatformInfo(platformIds[platformIdx], CL_PLATFORM_NAME, sizeof(platformName), platformName, NULL); + fprintf(stderr, "platform vendor %d %s\n", platformIdx, platformVendorName); + fprintf(stderr, "platform version %d %s\n", platformIdx, platformVersionName); + jobject platformInstance = JNIHelper::createInstance(jenv, OpenCLPlatformClass , ArgsVoidReturn(LongArg StringClassArg StringClassArg StringClassArg ), (jlong)platformIds[platformIdx], jenv->NewStringUTF(platformVersionName), jenv->NewStringUTF(platformVendorName), jenv->NewStringUTF(platformName) ); - JNIHelper::callVoid(jenv, platformListInstance, "add", ArgsBooleanReturn(ObjectClassArg), platformInstance); - - cl_uint deviceIdc; - // !!! oren fix - detect accelerators as well, they forgot to add the CL_DEVICE_TYPE_ACCELERATOR -#ifndef ALTERA_OPENCL - cl_device_type requestedDeviceType =CL_DEVICE_TYPE_CPU |CL_DEVICE_TYPE_GPU ; -#else - //cl_device_type requestedDeviceType =CL_DEVICE_TYPE_CPU |CL_DEVICE_TYPE_GPU | CL_DEVICE_TYPE_ACCELERATOR; - // Altera OpenCL fails if this is different then = CL_DEVICE_TYPE_ACCELERATOR - cl_device_type requestedDeviceType = CL_DEVICE_TYPE_ACCELERATOR; -#endif - status = clGetDeviceIDs(platformIds[platformIdx], requestedDeviceType, 0, NULL, &deviceIdc); + JNIHelper::callVoid(jenv, platformListInstance, "add", ArgsBooleanReturn(ObjectClassArg), platformInstance); + + cl_uint deviceIdc; + cl_device_type requestedDeviceType = CL_DEVICE_TYPE_ALL; + status = clGetDeviceIDs(platformIds[platformIdx], requestedDeviceType, 0, NULL, &deviceIdc); if (status == CL_SUCCESS && deviceIdc > 0 ){ fprintf(stderr, "found %d devices\n", deviceIdc); cl_device_id* deviceIds = new cl_device_id[deviceIdc]; @@ -476,7 +457,7 @@ JNI_JAVA(jobject, OpenCLJNI, getPlatforms) } } } - } + //} return (platformListInstance); } diff --git a/src/aparapi/com.amd.aparapi.jni/src/cpp/runKernel/Aparapi.cpp b/src/aparapi/com.amd.aparapi.jni/src/cpp/runKernel/Aparapi.cpp index 205c425853f584fbe0b29da677a87bd0775b0821..e9e5101e51cf08e3ea5fa73062ee6d2649220d33 100644 --- a/src/aparapi/com.amd.aparapi.jni/src/cpp/runKernel/Aparapi.cpp +++ b/src/aparapi/com.amd.aparapi.jni/src/cpp/runKernel/Aparapi.cpp @@ -52,6 +52,8 @@ #include <algorithm> //#include <string> +// !!! oren change -> +#include "ConfigSettings.h" //compiler dependant code /** @@ -1152,9 +1154,9 @@ inline char* getClassName(JNIEnv* jenv, JNIContext* jniContext, const char *optE // !!! Java adds '$' chars to inner class names so replace them with '.' char *charPtr = classNameStr; - while(charPtr=strchr(charPtr,'$')) + while(charPtr = strchr(charPtr,'$')) { - *charPtr='.'; + *charPtr = BINARY_FILE_SEP; charPtr++; } @@ -1285,46 +1287,57 @@ inline void outputOCLFile(JNIEnv* jenv, JNIContext* jniContext, const char *sour } +inline void verifyFlow(jint &buildFlags) +{ + // verify flow support is available + if(!(PLATFORM_FLOW_SUPPORT & buildFlags)) + { + fprintf(stderr, "!!! Error requested flow(%0xd) not available !!!\n",buildFlags); + throw CLException(CL_INVALID_VALUE,"buildProgramJNI() -> bad request flow"); + } + + // check/set if default flow is requested + if(buildFlags==DEFAULT_FLOW) + buildFlags = PLATFORM_DEFAULT_FLOW; +} JNI_JAVA(jlong, KernelRunnerJNI, buildProgramJNI) - (JNIEnv *jenv, jobject jobj, jlong jniContextHandle, jstring source) { + (JNIEnv *jenv, jobject jobj, jlong jniContextHandle, jstring source, jint buildFlags) { JNIContext* jniContext = JNIContext::getJNIContext(jniContextHandle); if (jniContext == NULL){ return 0; } try { - cl_int status = CL_SUCCESS; - -#ifdef ALTERA_OPENCL -#define OUTPUT_OCL_FILE -#define USE_BINARY_FILE -#define BINARY_FILE_EXT ".aocx" -#endif - -// allows defining an alternative folder where bin files should be loaded from -// Usefull when running in aparapi embeded mode -#define BINARY_FOLDER_ENV_VAR "APARAPI_CL_BIN_FOLDER" + cl_int status = CL_SUCCESS; const char *sourceChars = jenv->GetStringUTFChars(source, NULL); -//#ifdef OUTPUT_OCL_FILE - outputOCLFile(jenv,jniContext,sourceChars); -//#endif - -#ifdef USE_BINARY_FILE - char *binFileFolder = getenv(BINARY_FOLDER_ENV_VAR); - fprintf(stderr, "Bin Folder is %s\n",binFileFolder); - char *binFileName = getClassName(jenv,jniContext,BINARY_FILE_EXT); - char *fullBinFilePath = buildFilePath(binFileFolder,binFileName); - fprintf(stderr, "FullBinFilePath is %s\n",fullBinFilePath); - jniContext->program = CLHelper::createProgramWithBinary(jenv, jniContext->context, 1, &jniContext->deviceId, fullBinFilePath, NULL, &status); - delete []binFileName; - delete []fullBinFilePath; -#else - jniContext->program = CLHelper::createProgramWithSource(jenv, jniContext->context, 1, &jniContext->deviceId, sourceChars, NULL, &status); +#ifdef OUTPUT_OCL_FILE + outputOCLFile(jenv,jniContext,sourceChars); #endif + // !!! oren change -> + // verify the flow and modify if need be + verifyFlow(buildFlags); + +//#ifdef USE_BINARY_FILE + if(buildFlags & com_amd_aparapi_internal_jni_KernelRunnerJNI_JNI_FLAG_BINARY_FLOW) + { + char *binFileFolder = getenv(BINARY_FOLDER_ENV_VAR); + fprintf(stderr, "Bin Folder is %s\n",binFileFolder); + char *binFileName = getClassName(jenv,jniContext,BINARY_FILE_EXT); + char *fullBinFilePath = buildFilePath(binFileFolder,binFileName); + fprintf(stderr, "FullBinFilePath is %s\n",fullBinFilePath); + jniContext->program = CLHelper::createProgramWithBinary(jenv, jniContext->context, 1, &jniContext->deviceId, fullBinFilePath, NULL, &status); + delete []binFileName; + delete []fullBinFilePath; + } +//#else + else + jniContext->program = CLHelper::createProgramWithSource(jenv, jniContext->context, 1, &jniContext->deviceId, sourceChars, NULL, &status); +//#endif + jenv->ReleaseStringUTFChars(source, sourceChars); if(status == CL_BUILD_PROGRAM_FAILURE) throw CLException(status, ""); diff --git a/src/aparapi/com.amd.aparapi.jni/src/cpp/runKernel/ConfigSettings.h b/src/aparapi/com.amd.aparapi.jni/src/cpp/runKernel/ConfigSettings.h new file mode 100644 index 0000000000000000000000000000000000000000..eb7063b19a26264aeb36dbd44502477a95fbcc8c --- /dev/null +++ b/src/aparapi/com.amd.aparapi.jni/src/cpp/runKernel/ConfigSettings.h @@ -0,0 +1,58 @@ +#ifndef CONFIG_SETTINGS_H +#define CONFIG_SETTINGS_H + +// !!! oren changes -> +// configuration settings for building platform specific code +// TODO: consider moving parts of this to a configuration file later on and load settings dynamically + +// use values from JNI config +#include "com_amd_aparapi_internal_jni_KernelRunnerJNI.h" + + +// auto output kernel.cl file +#define OUTPUT_OCL_FILE +// allows defining an alternative folder where bin files should be loaded from +// Useful when running in Aparapi embedded mode +#define BINARY_FOLDER_ENV_VAR "APARAPI_CL_BIN_FOLDER" + +/////////////////////////// +// help determine if platform supports source/binary flows +/////////////////////////// +#define SOURCE_FLOW com_amd_aparapi_internal_jni_KernelRunnerJNI_JNI_FLAG_SOURCE_FLOW +#define BINARY_FLOW com_amd_aparapi_internal_jni_KernelRunnerJNI_JNI_FLAG_BINARY_FLOW +#define DEFAULT_FLOW com_amd_aparapi_internal_jni_KernelRunnerJNI_JNI_FLAG_DEFAULT_FLOW +/////////////////////////// + + +/////////////////////////// +// define platform settings +////////////////////////// +// BINARY_FILE_EXT => define binary file extension +// BINARY_FILE_SEP => define binary file separator, replaces java's $ signs in file names -> examples: .,_ etc. +/////////////////////////// +// Altera platform specific +/////////////////////////// +#ifdef ALTERA_OPENCL + #define PLATFORM_FLOW_SUPPORT BINARY_FLOW + #define PLATFORM_DEFAULT_FLOW BINARY_FLOW + #define BINARY_FILE_EXT ".aocx" + #define BINARY_FILE_SEP '.' +#elif AMD_OPENCL +// AMD specific +#elif INTEL_OPENCL +// Intel specific +#elif NVIDIA_OPENCL +// NVidia specific +#else // default settings +/////////////////////////// +// All other platforms - set the default for other platforms +/////////////////////////// + #define PLATFORM_FLOW_SUPPORT (BINARY_FLOW | SOURCE_FLOW) + #define PLATFORM_DEFAULT_FLOW SOURCE_FLOW + #define BINARY_FILE_EXT ".bcl" + #define BINARY_FILE_SEP '.' +#endif // ALTERA_OPENCL + +#endif // CONFIG_SETTINGS_H + + diff --git a/src/aparapi/com.amd.aparapi/dist/aparapi.jar b/src/aparapi/com.amd.aparapi/dist/aparapi.jar index 7e1bc57f0d7e26156e6950c36dbb6b8f273faf72..0be74ef231d313a2b694f0ca651bdb8c5caf6e2b 100644 Binary files a/src/aparapi/com.amd.aparapi/dist/aparapi.jar and b/src/aparapi/com.amd.aparapi/dist/aparapi.jar differ diff --git a/src/aparapi/com.amd.aparapi/src/java/com/amd/aparapi/Config.java b/src/aparapi/com.amd.aparapi/src/java/com/amd/aparapi/Config.java index da578847e0cda8341403d573bd67ab2401e5dd18..004ee360bc5b44717009635a969994fdfd054722 100644 --- a/src/aparapi/com.amd.aparapi/src/java/com/amd/aparapi/Config.java +++ b/src/aparapi/com.amd.aparapi/src/java/com/amd/aparapi/Config.java @@ -93,7 +93,16 @@ public class Config extends ConfigJNI{ * */ public static final String platformHint = System.getProperty(propPkgName + ".platformHint"); - + + //!!! oren change 7.15.15 -> allow choosing a flow type + /** + * Allows the user to select a flow type + * + * Usage -Dcom.amd.aparapi.flowType={binary|source|default} + * + */ + public static final String flowType = System.getProperty(propPkgName + ".flowType"); + /** * Allows the user to request that the execution mode of each kernel invocation be reported to stdout. * @@ -204,6 +213,8 @@ public class Config extends ConfigJNI{ System.out.println(propPkgName + ".enableProfilingCSV{true|false}=" + enableProfilingCSV); // !!! oren change System.out.println(propPkgName + ".profilingFileNameFormatStr{format str}=" + profilingFileNameFormatStr); + System.out.println(propPkgName + ".flowType{source|binary|default}=" + flowType); + ////////////////// System.out.println(propPkgName + ".enableVerboseJNI{true|false}=" + enableVerboseJNI); System.out.println(propPkgName + ".enableVerboseJNIOpenCLResourceTracking{true|false}=" + enableVerboseJNIOpenCLResourceTracking); diff --git a/src/aparapi/com.amd.aparapi/src/java/com/amd/aparapi/Kernel.java b/src/aparapi/com.amd.aparapi/src/java/com/amd/aparapi/Kernel.java index 900b344042d8e9ccf6209f0aa2da91e9142031fc..2d11bcce22f037ca2f3653ff72d0588de4303590 100644 --- a/src/aparapi/com.amd.aparapi/src/java/com/amd/aparapi/Kernel.java +++ b/src/aparapi/com.amd.aparapi/src/java/com/amd/aparapi/Kernel.java @@ -405,6 +405,80 @@ public abstract class Kernel implements Cloneable { } }; + //////////////////// + // !!! oren change -> add source/binary flow support to kernel + //////////////////// + public static enum FlowType + { + // flow type list + SOURCE(com.amd.aparapi.internal.jni.KernelRunnerJNI.JNI_FLAG_SOURCE_FLOW), + BINARY(com.amd.aparapi.internal.jni.KernelRunnerJNI.JNI_FLAG_BINARY_FLOW), + DEFAULT(com.amd.aparapi.internal.jni.KernelRunnerJNI.JNI_FLAG_DEFAULT_FLOW); + + // data store + int flowType; + + FlowType(int flowType) + { + setValue(flowType); + } + + FlowType(String flowTypeStr) + { + this.flowType = strToFlowType(flowTypeStr).getValue(); + } + + public int getValue() + { + return this.flowType; + } + + private void setValue(int flowType) + { + this.flowType = flowType; + } + + public static FlowType getDefaultFlowType() + { + // if set by user try get value else set to default + FlowType flowType = (Config.flowType==null) ? DEFAULT : strToFlowType(Config.flowType); + return flowType; + } + + public static FlowType strToFlowType(final String flowTypeStr) + { + try + { + FlowType flowType = valueOf(flowTypeStr.toUpperCase()); + return flowType; + } + catch (Exception e) + { + logger.info("!!! bad flow type => (" + flowTypeStr + ") => reverting to default platform flow!"); + throw e; + } + } + + } + + public FlowType getFlowType() { + return kernelFlowType; + } + + + public void setFlowType(FlowType kernelFlowType) { + this.kernelFlowType = kernelFlowType; + } + + public void setFlowType(String flowTypeStr) { + this.kernelFlowType = FlowType.strToFlowType(flowTypeStr); + } + + private FlowType kernelFlowType = FlowType.getDefaultFlowType(); + + + //////////////////// + private KernelRunner kernelRunner = null; private KernelState kernelState = new KernelState(); @@ -616,7 +690,8 @@ public abstract class Kernel implements Cloneable { return getGlobalId(0); } - @OpenCLDelegate + +@OpenCLDelegate protected final int getGlobalId(int _dim) { return kernelState.getGlobalIds()[_dim]; } @@ -2816,4 +2891,5 @@ public abstract class Kernel implements Cloneable { executionMode = currentMode.next(); } } + } diff --git a/src/aparapi/com.amd.aparapi/src/java/com/amd/aparapi/Range.java b/src/aparapi/com.amd.aparapi/src/java/com/amd/aparapi/Range.java index 06fd2c6377c2cc959587e573cdc32bc5048d31be..34e3a4b5ac1e6a33db58640df381c7ee2daa501f 100644 --- a/src/aparapi/com.amd.aparapi/src/java/com/amd/aparapi/Range.java +++ b/src/aparapi/com.amd.aparapi/src/java/com/amd/aparapi/Range.java @@ -51,7 +51,11 @@ public class Range extends RangeJNI{ public static final int THREADS_PER_CORE = 16; - public static final int MAX_OPENCL_GROUP_SIZE = 256; + // !!! oren change -> this value looks out dated and the mechanism probably needs revisiting !!! + // we already see evidence of improved performance for size==1024 on certain devices (ref: FPGA doc classification paper) + // for now we set it to 4X original value, but we should think about it more... + //public static final int MAX_OPENCL_GROUP_SIZE = 256; + public static final int MAX_OPENCL_GROUP_SIZE = 1024; public static final int MAX_GROUP_SIZE = Math.max(Runtime.getRuntime().availableProcessors() * THREADS_PER_CORE, MAX_OPENCL_GROUP_SIZE); @@ -113,11 +117,18 @@ public class Range extends RangeJNI{ */ private static int[] getFactors(int _value, int _max) { - final int factors[] = new int[MAX_GROUP_SIZE]; + //final int factors[] = new int[MAX_GROUP_SIZE]; int factorIdx = 0; - for (int possibleFactor = 1; possibleFactor <= _max; possibleFactor++) { - if ((_value % possibleFactor) == 0) { + // !!! oren bug fix -> based on poz findings + // max can not be bigger then value and if factorIdx >= MAX_GROUP_SIZE we will have an access violation + final int GroupSizeLimit = Math.min(Math.min(_max,_value),MAX_GROUP_SIZE); + final int factors[] = new int[GroupSizeLimit]; + //for (int possibleFactor = 1; possibleFactor <= _max; possibleFactor++) + for (int possibleFactor = 1; possibleFactor <= GroupSizeLimit; possibleFactor++) + { + if ((_value % possibleFactor) == 0) + { factors[factorIdx++] = possibleFactor; } } diff --git a/src/aparapi/com.amd.aparapi/src/java/com/amd/aparapi/device/Device.java b/src/aparapi/com.amd.aparapi/src/java/com/amd/aparapi/device/Device.java index 7fa7ec4c59e3ce4d44070e2aca4cfdb40ccf0bd3..f1c2f68e09c0d578956ed0b43c74b6360a368ea8 100644 --- a/src/aparapi/com.amd.aparapi/src/java/com/amd/aparapi/device/Device.java +++ b/src/aparapi/com.amd.aparapi/src/java/com/amd/aparapi/device/Device.java @@ -1,8 +1,11 @@ package com.amd.aparapi.device; +import java.util.List; + import com.amd.aparapi.Range; import com.amd.aparapi.device.OpenCLDevice.DeviceComparitor; import com.amd.aparapi.device.OpenCLDevice.DeviceSelector; +import com.amd.aparapi.internal.opencl.OpenCLPlatform; public abstract class Device{ @@ -14,6 +17,98 @@ public abstract class Device{ JTP, SEQ }; + + // !!! oren change -> get device using the tuple (platform, deviceType, id) + + public static Device getDevice(String platformName, Device.TYPE deviceType, int deviceId) + { + return getDevice(platformName,deviceType.name(),deviceId); + } + + // get first available device + + public static Device getDevice(String platformName, Device.TYPE deviceType) + { + return getDevice(platformName,deviceType.name(),0); + } + + public static Device getDevice(String platformName, String deviceTypeName) + { + return getDevice(platformName,deviceTypeName,0); + } + + public static Device getDevice(String platformName, String deviceTypeName, int deviceId) + { + List<OpenCLPlatform> platforms = (new OpenCLPlatform()).getOpenCLPlatforms(); + + int platformc = 0; + for (OpenCLPlatform platform : platforms) + { + if(platform.getName().contains(platformName)) + { + + System.out.println("Platform " + platformc + "{"); + + System.out.println(" Name : \"" + platform.getName() + "\""); + + System.out.println(" Vendor : \"" + platform.getVendor() + "\""); + + System.out.println(" Version : \"" + platform.getVersion() + "\""); + + List<OpenCLDevice> devices = platform.getOpenCLDevices(); + + System.out.println(" Platform contains " + devices.size() + " OpenCL devices"); + + int devicec = 0; + + for (OpenCLDevice device : devices) + { + if( device.getType().name().equalsIgnoreCase(deviceTypeName)) + { + + System.out.println(" Device " + devicec + "{"); + + System.out.println(" Type : " + device.getType()); + + System.out.println(" GlobalMemSize : " + device.getGlobalMemSize()); + + System.out.println(" LocalMemSize : " + device.getLocalMemSize()); + + System.out.println(" MaxComputeUnits : " + device.getMaxComputeUnits()); + + System.out.println(" MaxWorkGroupSizes : " + device.getMaxWorkGroupSize()); + + System.out.println(" MaxWorkItemDimensions : " + device.getMaxWorkItemDimensions()); + + System.out.println(" }"); + + if(deviceId>0 && (devicec!=deviceId)) + { + System.out.println("!!! devicec!=deviceId(" + deviceId + ") => continue search !!!"); + continue; + } + + // close platform bracket + System.out.println("}"); + + return device; + } + + devicec++; + } + System.out.println("Device type/id combination not found"); + + System.out.println("}"); + + platformc++; + + } + + } + // return not found !!! + return null; + } + public static Device best() { return (OpenCLDevice.select(new DeviceComparitor(){ diff --git a/src/aparapi/com.amd.aparapi/src/java/com/amd/aparapi/internal/jni/KernelRunnerJNI.java b/src/aparapi/com.amd.aparapi/src/java/com/amd/aparapi/internal/jni/KernelRunnerJNI.java index a672093fe97114a2e1fe4d5ed1fc63c73bb7e5bf..da60ff634120a890eeeebf3a6315e69de3c7e395 100644 --- a/src/aparapi/com.amd.aparapi/src/java/com/amd/aparapi/internal/jni/KernelRunnerJNI.java +++ b/src/aparapi/com.amd.aparapi/src/java/com/amd/aparapi/internal/jni/KernelRunnerJNI.java @@ -276,6 +276,19 @@ public abstract class KernelRunnerJNI{ */ // @UsedByJNICode @Annotations.Experimental protected static final int JNI_FLAG_ENABLE_VERBOSE_JNI_OPENCL_RESOURCE_TRACKING = 1 << 4; + /** !!! oren change -> + * These flags indicate that we want to build source/binary i.e. use source/binary flow. + * + * Be careful changing final constants starting with JNI.<br/> + * + * @see com.amd.aparapi.internal.annotation.UsedByJNICode + * + * @author oren + */ + @UsedByJNICode public static final int JNI_FLAG_SOURCE_FLOW = 1 << 0; + @UsedByJNICode public static final int JNI_FLAG_BINARY_FLOW = 1 << 1; + @UsedByJNICode public static final int JNI_FLAG_DEFAULT_FLOW = 1 << 2; + /* * Native methods */ @@ -295,7 +308,9 @@ public abstract class KernelRunnerJNI{ protected native int getJNI(long _jniContextHandle, Object _array); - protected native long buildProgramJNI(long _jniContextHandle, String _source); + //protected native long buildProgramJNI(long _jniContextHandle, String _source); + // !!! oren change -> add binary option to build + protected native long buildProgramJNI(long _jniContextHandle, String _source, int _buildFlags); protected native int setArgsJNI(long _jniContextHandle, KernelArgJNI[] _args, int argc); diff --git a/src/aparapi/com.amd.aparapi/src/java/com/amd/aparapi/internal/kernel/KernelRunner.java b/src/aparapi/com.amd.aparapi/src/java/com/amd/aparapi/internal/kernel/KernelRunner.java index af7b5afa9332c896407951c1259a02deeefc4b59..c8578a9bc6ccaac8631a2e36cb2fbec19826b29c 100644 --- a/src/aparapi/com.amd.aparapi/src/java/com/amd/aparapi/internal/kernel/KernelRunner.java +++ b/src/aparapi/com.amd.aparapi/src/java/com/amd/aparapi/internal/kernel/KernelRunner.java @@ -1047,8 +1047,11 @@ public class KernelRunner extends KernelRunnerJNI{ e1.printStackTrace(); } */ + // !!! oren change -> support flow types + // set flow type + int buildFlags = kernel.getFlowType().getValue(); // Send the string to OpenCL to compile it - if (buildProgramJNI(jniContextHandle, openCL) == 0) { + if (buildProgramJNI(jniContextHandle, openCL,buildFlags) == 0) { return warnFallBackAndExecute(_entrypointName, _range, _passes, "OpenCL compile failed"); } diff --git a/src/aparapi/samples/add/add.jar b/src/aparapi/samples/add/add.jar index bfdfc338d748f2d47dce6d35f6641722bb0f35c4..d3b27be0a3a87fa33d23810463f0b115fb033af0 100644 Binary files a/src/aparapi/samples/add/add.jar and b/src/aparapi/samples/add/add.jar differ diff --git a/src/aparapi/samples/add/selectPlatform.sh b/src/aparapi/samples/add/selectPlatform.sh new file mode 100644 index 0000000000000000000000000000000000000000..386ea6e808ac110d0d514f39957587451f59291d --- /dev/null +++ b/src/aparapi/samples/add/selectPlatform.sh @@ -0,0 +1,42 @@ + +# +# select platform and flow test script. +# runs add kernel on diffrent platforms/flows. +# +# $1 = platformHint [AMD,Altera,Intel,NVidia] +# $2 = deviceType [CPU,CPU,ACC] +# $3 = deviceId [0=first available device..n=available device] +# $4 = flowTypeStr [source,binary,default] +# can use -Dcom.amd.aparapi.flowType instead +# $5 = dist.[std,fpga] to load -> !!! note -> remember to build and copy libaparapi_x86_64.so to the correct place +# example: cp ../../com.amd.aparapi.jni/dist/libaparapi_x86_64.so ../../com.amd.aparapi.jni/dist.std/ +# Alternatively use a script verion below without $5 -> +# -Djava.library.path=../../com.amd.aparapi.jni/dist \ + +# usage examples: +# sh selectPlatform.sh AMD CPU 0 source std +# sh selectPlatform.sh Altera ACC 0 binary fpga + +java \ + -Djava.library.path=../../com.amd.aparapi.jni/dist.$5 \ + -classpath ../../com.amd.aparapi/dist/aparapi.jar:add.jar \ + com.amd.aparapi.sample.add.MainSelectPlatform $1 $2 $3 $4 + +# +# other script variants uncomment to use -> +# + +# script version with libaparapi_x86_64.so in dist +#java \ +# -Djava.library.path=../../com.amd.aparapi.jni/dist \ +# -classpath ../../com.amd.aparapi/dist/aparapi.jar:add.jar \ +# com.amd.aparapi.sample.add.MainSelectPlatform $1 $2 $3 + +# script version with flow type set through config +#java \ +# -Djava.library.path=../../com.amd.aparapi.jni/dist.$5 \ +# -Dcom.amd.aparapi.flowType=$4 \ +# -classpath ../../com.amd.aparapi/dist/aparapi.jar:add.jar \ +# com.amd.aparapi.sample.add.MainSelectPlatform $1 $2 $3 + + diff --git a/src/aparapi/samples/add/src/com/amd/aparapi/sample/add/MainSelectPlatform.java b/src/aparapi/samples/add/src/com/amd/aparapi/sample/add/MainSelectPlatform.java new file mode 100644 index 0000000000000000000000000000000000000000..053483ef860404f82a66a9874f93dd513f6123d7 --- /dev/null +++ b/src/aparapi/samples/add/src/com/amd/aparapi/sample/add/MainSelectPlatform.java @@ -0,0 +1,277 @@ +/* + +Copyright (c) 2010-2011, Advanced Micro Devices, Inc. + +All rights reserved. + + + +Redistribution and use in source and binary forms, with or without modification, are permitted provided that the + +following conditions are met: + + + +Redistributions of source code must retain the above copyright notice, this list of conditions and the following + +disclaimer. + + + +Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following + +disclaimer in the documentation and/or other materials provided with the distribution. + + + +Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products + +derived from this software without specific prior written permission. + + + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + +INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + +WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + + +If you use the software (in whole or in part), you shall adhere to all applicable U.S., European, and other export + +laws, including but not limited to the U.S. Export Administration Regulations ("EAR"), (15 C.F.R. Sections 730 through + +774), and E.U. Council Regulation (EC) No 1334/2000 of 22 June 2000. Further, pursuant to Section 740.6 of the EAR, + +you hereby certify that, except pursuant to a license granted by the United States Department of Commerce Bureau of + +Industry and Security or as otherwise permitted pursuant to a License Exception under the U.S. Export Administration + +Regulations ("EAR"), you will not (1) export, re-export or release to a national of a country in Country Groups D:1, + +E:1 or E:2 any restricted technology, software, or source code you receive hereunder, or (2) export to Country Groups + +D:1, E:1 or E:2 the direct product of such technology or software, if such foreign produced direct product is subject + +to national security controls as identified on the Commerce Control List (currently found in Supplement 1 to Part 774 + +of EAR). For the most current Country Group listings, or for additional information about the EAR or your obligations + +under those regulations, please refer to the U.S. Bureau of Industry and Security's website at http://www.bis.doc.gov/. + + + +*/ + + + +package com.amd.aparapi.sample.add; + + + +import java.io.IOException; + +import java.util.concurrent.TimeUnit; + + + +import com.amd.aparapi.Kernel; + +import com.amd.aparapi.Range; + + +import com.amd.aparapi.device.Device; + +import com.amd.aparapi.device.OpenCLDevice; + +import com.amd.aparapi.internal.opencl.OpenCLPlatform; + + +import java.util.List; + + + +public class MainSelectPlatform { + + public static void listPlatformsAndDevices() + { + List<OpenCLPlatform> platforms = (new OpenCLPlatform()).getOpenCLPlatforms(); + + int platformc = 0; + for (OpenCLPlatform platform : platforms) + { + + System.out.println("Platform " + platformc + "{"); + + System.out.println(" Name : \"" + platform.getName() + "\""); + + System.out.println(" Vendor : \"" + platform.getVendor() + "\""); + + System.out.println(" Version : \"" + platform.getVersion() + "\""); + + List<OpenCLDevice> devices = platform.getOpenCLDevices(); + + System.out.println(" Platform contains " + devices.size() + " OpenCL devices"); + + int devicec = 0; + + for (OpenCLDevice device : devices) + { + System.out.println(" Device " + devicec + "{"); + + System.out.println(" Type : " + device.getType()); + + System.out.println(" GlobalMemSize : " + device.getGlobalMemSize()); + + System.out.println(" LocalMemSize : " + device.getLocalMemSize()); + + System.out.println(" MaxComputeUnits : " + device.getMaxComputeUnits()); + + System.out.println(" MaxWorkGroupSizes : " + device.getMaxWorkGroupSize()); + + System.out.println(" MaxWorkItemDimensions : " + device.getMaxWorkItemDimensions()); + + System.out.println(" }"); + + devicec++; + } + + // close platform bracket + System.out.println("}"); + + platformc++; + } + } + + public static void main(String[] args) { + + + + final int size = 1000*1000; + + + + final float[] a = new float[size]; + + final float[] b = new float[size]; + + + + for (int i = 0; i < size; i++) { + + a[i] = (float) (Math.random() * 100); + + b[i] = (float) (Math.random() * 100); + + } + + + + final float[] sum = new float[size]; + + + + Kernel kernel = new Kernel(){ + + @Override public void run() { + + int gid = getGlobalId(); + + sum[gid] = a[gid] + b[gid]; + + } + + }; + + + + + + // !!! oren -> add time measurement + + System.out.printf("Running kernel.."); + + + + long startTime = System.nanoTime(); + + + // !!! experiment with platform/device selection + System.out.printf("**** listPlatformsAndDevices ****\n"); + listPlatformsAndDevices(); + System.out.printf("****************\n"); + if(args.length<2) + { + System.out.printf("****************\n"); + System.out.printf("Usage is: select platformHint deviceType\n"); + System.out.printf("****************\n"); + return; + } + + String platformHint = args[0]; + String deviceType = args[1]; + int deviceId = (args.length>2) ? Integer.parseInt(args[2]) : 0; + String flowTypeStr = (args.length>3) ? args[3] : null; + if(flowTypeStr!=null) + kernel.setFlowType(flowTypeStr); + System.out.printf("**** getDevice ****\n"); + Device device = Device.getDevice(platformHint,deviceType,deviceId); + kernel.execute(Range.create(device,512,16)); + System.out.printf("****************\n"); + + + + long elapsedTimeNano = System.nanoTime() - startTime; + + + + long elapsedTimeSec = TimeUnit.SECONDS.convert(elapsedTimeNano, TimeUnit.NANOSECONDS); + + + + long elapsedTimeMilli = TimeUnit.MILLISECONDS.convert(elapsedTimeNano, TimeUnit.NANOSECONDS); + + + + System.out.printf("****************\n"); + + System.out.printf("Elapsed time in milli: %d\n",elapsedTimeMilli); + + System.out.printf("Elapsed time in sec : %d\n",elapsedTimeSec); + + System.out.printf("****************\n"); + + + + // !!! oren change -> show first 10 only + + //for (int i = 0; i < size; i++) { + + int displayRange = (size > 20) ? 20 : size; + + System.out.printf("**************** Showing first %d results ****************\n",displayRange); + + for (int i = 0; i < displayRange; i++) { + + System.out.printf("%6.2f + %6.2f = %8.2f\n", a[i], b[i], sum[i]); + + } + + + + kernel.dispose(); + + } + + + +}