diff --git a/src/aparapi/com.amd.aparapi.jni/dist.fpga/README.md b/src/aparapi/com.amd.aparapi.jni/dist.fpga/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..5da4963cee6582730bcffb02e2c5a172d0aaa116
--- /dev/null
+++ b/src/aparapi/com.amd.aparapi.jni/dist.fpga/README.md
@@ -0,0 +1,10 @@
+---------
+README
+---------
+some scripts use the following notation to find libaparapi_x86_64.so => -Djava.library.path=../../com.amd.aparapi.jni/dist.[X]
+Where X stands for platform type name: std, fpga etc.
+build and copy the X version of libaparapi_x86_64.so to this folder so scripts can pick it up:
+goto: com.amd.aparapi.jni
+run: ant -f build_X_.xml
+copy: dist/libaparapi_x86_64.so dist.X
+
diff --git a/src/aparapi/com.amd.aparapi.jni/dist.std/README.md b/src/aparapi/com.amd.aparapi.jni/dist.std/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..5da4963cee6582730bcffb02e2c5a172d0aaa116
--- /dev/null
+++ b/src/aparapi/com.amd.aparapi.jni/dist.std/README.md
@@ -0,0 +1,10 @@
+---------
+README
+---------
+some scripts use the following notation to find libaparapi_x86_64.so => -Djava.library.path=../../com.amd.aparapi.jni/dist.[X]
+Where X stands for platform type name: std, fpga etc.
+build and copy the X version of libaparapi_x86_64.so to this folder so scripts can pick it up:
+goto: com.amd.aparapi.jni
+run: ant -f build_X_.xml
+copy: dist/libaparapi_x86_64.so dist.X
+
diff --git a/src/aparapi/com.amd.aparapi.jni/dist/libaparapi_x86_64.so b/src/aparapi/com.amd.aparapi.jni/dist/libaparapi_x86_64.so
index b54beb2c36c3d06d6747102c2a59023f077791f6..95d77cecc323704ccb9f71fdb6fdbb71b16fdbfe 100755
Binary files a/src/aparapi/com.amd.aparapi.jni/dist/libaparapi_x86_64.so and b/src/aparapi/com.amd.aparapi.jni/dist/libaparapi_x86_64.so differ
diff --git a/src/aparapi/com.amd.aparapi.jni/include/com_amd_aparapi_Kernel_FlowType.h b/src/aparapi/com.amd.aparapi.jni/include/com_amd_aparapi_Kernel_FlowType.h
new file mode 100644
index 0000000000000000000000000000000000000000..30b2ec599b5bfd797767784db5146dc0ede8033e
--- /dev/null
+++ b/src/aparapi/com.amd.aparapi.jni/include/com_amd_aparapi_Kernel_FlowType.h
@@ -0,0 +1,13 @@
+/* DO NOT EDIT THIS FILE - it is machine generated */
+#include <jni.h>
+/* Header for class com_amd_aparapi_Kernel_FlowType */
+
+#ifndef _Included_com_amd_aparapi_Kernel_FlowType
+#define _Included_com_amd_aparapi_Kernel_FlowType
+#ifdef __cplusplus
+extern "C" {
+#endif
+#ifdef __cplusplus
+}
+#endif
+#endif
diff --git a/src/aparapi/com.amd.aparapi.jni/include/com_amd_aparapi_Range.h b/src/aparapi/com.amd.aparapi.jni/include/com_amd_aparapi_Range.h
index 8de80a9844444d079086e7674c634aa936688d44..5a4f770f9a1bdac9398c4c2a4c40bb0131a2db64 100644
--- a/src/aparapi/com.amd.aparapi.jni/include/com_amd_aparapi_Range.h
+++ b/src/aparapi/com.amd.aparapi.jni/include/com_amd_aparapi_Range.h
@@ -10,7 +10,7 @@ extern "C" {
#undef com_amd_aparapi_Range_THREADS_PER_CORE
#define com_amd_aparapi_Range_THREADS_PER_CORE 16L
#undef com_amd_aparapi_Range_MAX_OPENCL_GROUP_SIZE
-#define com_amd_aparapi_Range_MAX_OPENCL_GROUP_SIZE 256L
+#define com_amd_aparapi_Range_MAX_OPENCL_GROUP_SIZE 1024L
#ifdef __cplusplus
}
#endif
diff --git a/src/aparapi/com.amd.aparapi.jni/include/com_amd_aparapi_internal_jni_KernelRunnerJNI.h b/src/aparapi/com.amd.aparapi.jni/include/com_amd_aparapi_internal_jni_KernelRunnerJNI.h
index 555f91426e90e4f115ba50a36bdb2132fe2c7532..6791475b00edd9bff51e1b85aab0114f4146b019 100644
--- a/src/aparapi/com.amd.aparapi.jni/include/com_amd_aparapi_internal_jni_KernelRunnerJNI.h
+++ b/src/aparapi/com.amd.aparapi.jni/include/com_amd_aparapi_internal_jni_KernelRunnerJNI.h
@@ -51,6 +51,12 @@ extern "C" {
#define com_amd_aparapi_internal_jni_KernelRunnerJNI_ARG_STATIC 4194304L
#undef com_amd_aparapi_internal_jni_KernelRunnerJNI_JNI_FLAG_USE_GPU
#define com_amd_aparapi_internal_jni_KernelRunnerJNI_JNI_FLAG_USE_GPU 4L
+#undef com_amd_aparapi_internal_jni_KernelRunnerJNI_JNI_FLAG_SOURCE_FLOW
+#define com_amd_aparapi_internal_jni_KernelRunnerJNI_JNI_FLAG_SOURCE_FLOW 1L
+#undef com_amd_aparapi_internal_jni_KernelRunnerJNI_JNI_FLAG_BINARY_FLOW
+#define com_amd_aparapi_internal_jni_KernelRunnerJNI_JNI_FLAG_BINARY_FLOW 2L
+#undef com_amd_aparapi_internal_jni_KernelRunnerJNI_JNI_FLAG_DEFAULT_FLOW
+#define com_amd_aparapi_internal_jni_KernelRunnerJNI_JNI_FLAG_DEFAULT_FLOW 4L
/*
* Class: com_amd_aparapi_internal_jni_KernelRunnerJNI
* Method: initJNI
@@ -70,10 +76,10 @@ JNIEXPORT jint JNICALL Java_com_amd_aparapi_internal_jni_KernelRunnerJNI_getJNI
/*
* Class: com_amd_aparapi_internal_jni_KernelRunnerJNI
* Method: buildProgramJNI
- * Signature: (JLjava/lang/String;)J
+ * Signature: (JLjava/lang/String;I)J
*/
JNIEXPORT jlong JNICALL Java_com_amd_aparapi_internal_jni_KernelRunnerJNI_buildProgramJNI
- (JNIEnv *, jobject, jlong, jstring);
+ (JNIEnv *, jobject, jlong, jstring, jint);
/*
* Class: com_amd_aparapi_internal_jni_KernelRunnerJNI
diff --git a/src/aparapi/com.amd.aparapi.jni/include/com_amd_aparapi_internal_kernel_KernelRunner.h b/src/aparapi/com.amd.aparapi.jni/include/com_amd_aparapi_internal_kernel_KernelRunner.h
index b9f11d7f81c45c1c24027e6723d40a0310abae83..2819cc7d35fcfd374c54f71d439e6d1627547b11 100644
--- a/src/aparapi/com.amd.aparapi.jni/include/com_amd_aparapi_internal_kernel_KernelRunner.h
+++ b/src/aparapi/com.amd.aparapi.jni/include/com_amd_aparapi_internal_kernel_KernelRunner.h
@@ -51,6 +51,12 @@ extern "C" {
#define com_amd_aparapi_internal_kernel_KernelRunner_ARG_STATIC 4194304L
#undef com_amd_aparapi_internal_kernel_KernelRunner_JNI_FLAG_USE_GPU
#define com_amd_aparapi_internal_kernel_KernelRunner_JNI_FLAG_USE_GPU 4L
+#undef com_amd_aparapi_internal_kernel_KernelRunner_JNI_FLAG_SOURCE_FLOW
+#define com_amd_aparapi_internal_kernel_KernelRunner_JNI_FLAG_SOURCE_FLOW 1L
+#undef com_amd_aparapi_internal_kernel_KernelRunner_JNI_FLAG_BINARY_FLOW
+#define com_amd_aparapi_internal_kernel_KernelRunner_JNI_FLAG_BINARY_FLOW 2L
+#undef com_amd_aparapi_internal_kernel_KernelRunner_JNI_FLAG_DEFAULT_FLOW
+#define com_amd_aparapi_internal_kernel_KernelRunner_JNI_FLAG_DEFAULT_FLOW 4L
#ifdef __cplusplus
}
#endif
diff --git a/src/aparapi/com.amd.aparapi.jni/src/cpp/invoke/OpenCLJNI.cpp b/src/aparapi/com.amd.aparapi.jni/src/cpp/invoke/OpenCLJNI.cpp
index a4a49cf8bad22f0cb71ac0b46932e3934b8f45ac..71f36615cb6ef859811a77248c48cce45fc03b27 100644
--- a/src/aparapi/com.amd.aparapi.jni/src/cpp/invoke/OpenCLJNI.cpp
+++ b/src/aparapi/com.amd.aparapi.jni/src/cpp/invoke/OpenCLJNI.cpp
@@ -336,7 +336,8 @@ JNI_JAVA(void, OpenCLJNI, invoke)
}
JNI_JAVA(jobject, OpenCLJNI, getPlatforms)
- (JNIEnv *jenv, jobject jobj) {
+ (JNIEnv *jenv, jobject jobj)
+ {
jobject platformListInstance = JNIHelper::createInstance(jenv, ArrayListClass, VoidReturn);
cl_int status = CL_SUCCESS;
cl_uint platformc;
@@ -352,43 +353,23 @@ JNI_JAVA(jobject, OpenCLJNI, getPlatforms)
status = clGetPlatformInfo(platformIds[platformIdx], CL_PLATFORM_VERSION, sizeof(platformVersionName), platformVersionName, NULL);
fprintf(stderr, "platform version %d %s\n", platformIdx, platformVersionName);
- // fix this so OpenCL 1.3 or higher will not break!
- if (
-#ifdef ALTERA_OPENCL
- 1 // !!! oren fix for bad platform version check
- ||
-#endif
- !strncmp(platformVersionName, "OpenCL 1.2", 10)
- || !strncmp(platformVersionName, "OpenCL 1.1", 10)
-#ifdef __APPLE__
- || !strncmp(platformVersionName, "OpenCL 1.0", 10)
-#endif
- )
- {
- char platformVendorName[512];
- char platformName[512];
- status = clGetPlatformInfo(platformIds[platformIdx], CL_PLATFORM_VENDOR, sizeof(platformVendorName), platformVendorName, NULL);
- status = clGetPlatformInfo(platformIds[platformIdx], CL_PLATFORM_NAME, sizeof(platformName), platformName, NULL);
- fprintf(stderr, "platform vendor %d %s\n", platformIdx, platformVendorName);
- fprintf(stderr, "platform version %d %s\n", platformIdx, platformVersionName);
- jobject platformInstance = JNIHelper::createInstance(jenv, OpenCLPlatformClass , ArgsVoidReturn(LongArg StringClassArg StringClassArg StringClassArg ),
+ char platformVendorName[512];
+ char platformName[512];
+ status = clGetPlatformInfo(platformIds[platformIdx], CL_PLATFORM_VENDOR, sizeof(platformVendorName), platformVendorName, NULL);
+ status = clGetPlatformInfo(platformIds[platformIdx], CL_PLATFORM_NAME, sizeof(platformName), platformName, NULL);
+ fprintf(stderr, "platform vendor %d %s\n", platformIdx, platformVendorName);
+ fprintf(stderr, "platform version %d %s\n", platformIdx, platformVersionName);
+ jobject platformInstance = JNIHelper::createInstance(jenv, OpenCLPlatformClass , ArgsVoidReturn(LongArg StringClassArg StringClassArg StringClassArg ),
(jlong)platformIds[platformIdx],
jenv->NewStringUTF(platformVersionName),
jenv->NewStringUTF(platformVendorName),
jenv->NewStringUTF(platformName)
);
- JNIHelper::callVoid(jenv, platformListInstance, "add", ArgsBooleanReturn(ObjectClassArg), platformInstance);
-
- cl_uint deviceIdc;
- // !!! oren fix - detect accelerators as well, they forgot to add the CL_DEVICE_TYPE_ACCELERATOR
-#ifndef ALTERA_OPENCL
- cl_device_type requestedDeviceType =CL_DEVICE_TYPE_CPU |CL_DEVICE_TYPE_GPU ;
-#else
- //cl_device_type requestedDeviceType =CL_DEVICE_TYPE_CPU |CL_DEVICE_TYPE_GPU | CL_DEVICE_TYPE_ACCELERATOR;
- // Altera OpenCL fails if this is different then = CL_DEVICE_TYPE_ACCELERATOR
- cl_device_type requestedDeviceType = CL_DEVICE_TYPE_ACCELERATOR;
-#endif
- status = clGetDeviceIDs(platformIds[platformIdx], requestedDeviceType, 0, NULL, &deviceIdc);
+ JNIHelper::callVoid(jenv, platformListInstance, "add", ArgsBooleanReturn(ObjectClassArg), platformInstance);
+
+ cl_uint deviceIdc;
+ cl_device_type requestedDeviceType = CL_DEVICE_TYPE_ALL;
+ status = clGetDeviceIDs(platformIds[platformIdx], requestedDeviceType, 0, NULL, &deviceIdc);
if (status == CL_SUCCESS && deviceIdc > 0 ){
fprintf(stderr, "found %d devices\n", deviceIdc);
cl_device_id* deviceIds = new cl_device_id[deviceIdc];
@@ -476,7 +457,7 @@ JNI_JAVA(jobject, OpenCLJNI, getPlatforms)
}
}
}
- }
+ //}
return (platformListInstance);
}
diff --git a/src/aparapi/com.amd.aparapi.jni/src/cpp/runKernel/Aparapi.cpp b/src/aparapi/com.amd.aparapi.jni/src/cpp/runKernel/Aparapi.cpp
index 205c425853f584fbe0b29da677a87bd0775b0821..e9e5101e51cf08e3ea5fa73062ee6d2649220d33 100644
--- a/src/aparapi/com.amd.aparapi.jni/src/cpp/runKernel/Aparapi.cpp
+++ b/src/aparapi/com.amd.aparapi.jni/src/cpp/runKernel/Aparapi.cpp
@@ -52,6 +52,8 @@
#include <algorithm>
//#include <string>
+// !!! oren change ->
+#include "ConfigSettings.h"
//compiler dependant code
/**
@@ -1152,9 +1154,9 @@ inline char* getClassName(JNIEnv* jenv, JNIContext* jniContext, const char *optE
// !!! Java adds '$' chars to inner class names so replace them with '.'
char *charPtr = classNameStr;
- while(charPtr=strchr(charPtr,'$'))
+ while(charPtr = strchr(charPtr,'$'))
{
- *charPtr='.';
+ *charPtr = BINARY_FILE_SEP;
charPtr++;
}
@@ -1285,46 +1287,57 @@ inline void outputOCLFile(JNIEnv* jenv, JNIContext* jniContext, const char *sour
}
+inline void verifyFlow(jint &buildFlags)
+{
+ // verify flow support is available
+ if(!(PLATFORM_FLOW_SUPPORT & buildFlags))
+ {
+ fprintf(stderr, "!!! Error requested flow(%0xd) not available !!!\n",buildFlags);
+ throw CLException(CL_INVALID_VALUE,"buildProgramJNI() -> bad request flow");
+ }
+
+ // check/set if default flow is requested
+ if(buildFlags==DEFAULT_FLOW)
+ buildFlags = PLATFORM_DEFAULT_FLOW;
+}
JNI_JAVA(jlong, KernelRunnerJNI, buildProgramJNI)
- (JNIEnv *jenv, jobject jobj, jlong jniContextHandle, jstring source) {
+ (JNIEnv *jenv, jobject jobj, jlong jniContextHandle, jstring source, jint buildFlags) {
JNIContext* jniContext = JNIContext::getJNIContext(jniContextHandle);
if (jniContext == NULL){
return 0;
}
try {
- cl_int status = CL_SUCCESS;
-
-#ifdef ALTERA_OPENCL
-#define OUTPUT_OCL_FILE
-#define USE_BINARY_FILE
-#define BINARY_FILE_EXT ".aocx"
-#endif
-
-// allows defining an alternative folder where bin files should be loaded from
-// Usefull when running in aparapi embeded mode
-#define BINARY_FOLDER_ENV_VAR "APARAPI_CL_BIN_FOLDER"
+ cl_int status = CL_SUCCESS;
const char *sourceChars = jenv->GetStringUTFChars(source, NULL);
-//#ifdef OUTPUT_OCL_FILE
- outputOCLFile(jenv,jniContext,sourceChars);
-//#endif
-
-#ifdef USE_BINARY_FILE
- char *binFileFolder = getenv(BINARY_FOLDER_ENV_VAR);
- fprintf(stderr, "Bin Folder is %s\n",binFileFolder);
- char *binFileName = getClassName(jenv,jniContext,BINARY_FILE_EXT);
- char *fullBinFilePath = buildFilePath(binFileFolder,binFileName);
- fprintf(stderr, "FullBinFilePath is %s\n",fullBinFilePath);
- jniContext->program = CLHelper::createProgramWithBinary(jenv, jniContext->context, 1, &jniContext->deviceId, fullBinFilePath, NULL, &status);
- delete []binFileName;
- delete []fullBinFilePath;
-#else
- jniContext->program = CLHelper::createProgramWithSource(jenv, jniContext->context, 1, &jniContext->deviceId, sourceChars, NULL, &status);
+#ifdef OUTPUT_OCL_FILE
+ outputOCLFile(jenv,jniContext,sourceChars);
#endif
+ // !!! oren change ->
+ // verify the flow and modify if need be
+ verifyFlow(buildFlags);
+
+//#ifdef USE_BINARY_FILE
+ if(buildFlags & com_amd_aparapi_internal_jni_KernelRunnerJNI_JNI_FLAG_BINARY_FLOW)
+ {
+ char *binFileFolder = getenv(BINARY_FOLDER_ENV_VAR);
+ fprintf(stderr, "Bin Folder is %s\n",binFileFolder);
+ char *binFileName = getClassName(jenv,jniContext,BINARY_FILE_EXT);
+ char *fullBinFilePath = buildFilePath(binFileFolder,binFileName);
+ fprintf(stderr, "FullBinFilePath is %s\n",fullBinFilePath);
+ jniContext->program = CLHelper::createProgramWithBinary(jenv, jniContext->context, 1, &jniContext->deviceId, fullBinFilePath, NULL, &status);
+ delete []binFileName;
+ delete []fullBinFilePath;
+ }
+//#else
+ else
+ jniContext->program = CLHelper::createProgramWithSource(jenv, jniContext->context, 1, &jniContext->deviceId, sourceChars, NULL, &status);
+//#endif
+
jenv->ReleaseStringUTFChars(source, sourceChars);
if(status == CL_BUILD_PROGRAM_FAILURE) throw CLException(status, "");
diff --git a/src/aparapi/com.amd.aparapi.jni/src/cpp/runKernel/ConfigSettings.h b/src/aparapi/com.amd.aparapi.jni/src/cpp/runKernel/ConfigSettings.h
new file mode 100644
index 0000000000000000000000000000000000000000..eb7063b19a26264aeb36dbd44502477a95fbcc8c
--- /dev/null
+++ b/src/aparapi/com.amd.aparapi.jni/src/cpp/runKernel/ConfigSettings.h
@@ -0,0 +1,58 @@
+#ifndef CONFIG_SETTINGS_H
+#define CONFIG_SETTINGS_H
+
+// !!! oren changes ->
+// configuration settings for building platform specific code
+// TODO: consider moving parts of this to a configuration file later on and load settings dynamically
+
+// use values from JNI config
+#include "com_amd_aparapi_internal_jni_KernelRunnerJNI.h"
+
+
+// auto output kernel.cl file
+#define OUTPUT_OCL_FILE
+// allows defining an alternative folder where bin files should be loaded from
+// Useful when running in Aparapi embedded mode
+#define BINARY_FOLDER_ENV_VAR "APARAPI_CL_BIN_FOLDER"
+
+///////////////////////////
+// help determine if platform supports source/binary flows
+///////////////////////////
+#define SOURCE_FLOW com_amd_aparapi_internal_jni_KernelRunnerJNI_JNI_FLAG_SOURCE_FLOW
+#define BINARY_FLOW com_amd_aparapi_internal_jni_KernelRunnerJNI_JNI_FLAG_BINARY_FLOW
+#define DEFAULT_FLOW com_amd_aparapi_internal_jni_KernelRunnerJNI_JNI_FLAG_DEFAULT_FLOW
+///////////////////////////
+
+
+///////////////////////////
+// define platform settings
+//////////////////////////
+// BINARY_FILE_EXT => define binary file extension
+// BINARY_FILE_SEP => define binary file separator, replaces java's $ signs in file names -> examples: .,_ etc.
+///////////////////////////
+// Altera platform specific
+///////////////////////////
+#ifdef ALTERA_OPENCL
+ #define PLATFORM_FLOW_SUPPORT BINARY_FLOW
+ #define PLATFORM_DEFAULT_FLOW BINARY_FLOW
+ #define BINARY_FILE_EXT ".aocx"
+ #define BINARY_FILE_SEP '.'
+#elif AMD_OPENCL
+// AMD specific
+#elif INTEL_OPENCL
+// Intel specific
+#elif NVIDIA_OPENCL
+// NVidia specific
+#else // default settings
+///////////////////////////
+// All other platforms - set the default for other platforms
+///////////////////////////
+ #define PLATFORM_FLOW_SUPPORT (BINARY_FLOW | SOURCE_FLOW)
+ #define PLATFORM_DEFAULT_FLOW SOURCE_FLOW
+ #define BINARY_FILE_EXT ".bcl"
+ #define BINARY_FILE_SEP '.'
+#endif // ALTERA_OPENCL
+
+#endif // CONFIG_SETTINGS_H
+
+
diff --git a/src/aparapi/com.amd.aparapi/dist/aparapi.jar b/src/aparapi/com.amd.aparapi/dist/aparapi.jar
index 7e1bc57f0d7e26156e6950c36dbb6b8f273faf72..0be74ef231d313a2b694f0ca651bdb8c5caf6e2b 100644
Binary files a/src/aparapi/com.amd.aparapi/dist/aparapi.jar and b/src/aparapi/com.amd.aparapi/dist/aparapi.jar differ
diff --git a/src/aparapi/com.amd.aparapi/src/java/com/amd/aparapi/Config.java b/src/aparapi/com.amd.aparapi/src/java/com/amd/aparapi/Config.java
index da578847e0cda8341403d573bd67ab2401e5dd18..004ee360bc5b44717009635a969994fdfd054722 100644
--- a/src/aparapi/com.amd.aparapi/src/java/com/amd/aparapi/Config.java
+++ b/src/aparapi/com.amd.aparapi/src/java/com/amd/aparapi/Config.java
@@ -93,7 +93,16 @@ public class Config extends ConfigJNI{
*
*/
public static final String platformHint = System.getProperty(propPkgName + ".platformHint");
-
+
+ //!!! oren change 7.15.15 -> allow choosing a flow type
+ /**
+ * Allows the user to select a flow type
+ *
+ * Usage -Dcom.amd.aparapi.flowType={binary|source|default}
+ *
+ */
+ public static final String flowType = System.getProperty(propPkgName + ".flowType");
+
/**
* Allows the user to request that the execution mode of each kernel invocation be reported to stdout.
*
@@ -204,6 +213,8 @@ public class Config extends ConfigJNI{
System.out.println(propPkgName + ".enableProfilingCSV{true|false}=" + enableProfilingCSV);
// !!! oren change
System.out.println(propPkgName + ".profilingFileNameFormatStr{format str}=" + profilingFileNameFormatStr);
+ System.out.println(propPkgName + ".flowType{source|binary|default}=" + flowType);
+ //////////////////
System.out.println(propPkgName + ".enableVerboseJNI{true|false}=" + enableVerboseJNI);
System.out.println(propPkgName + ".enableVerboseJNIOpenCLResourceTracking{true|false}="
+ enableVerboseJNIOpenCLResourceTracking);
diff --git a/src/aparapi/com.amd.aparapi/src/java/com/amd/aparapi/Kernel.java b/src/aparapi/com.amd.aparapi/src/java/com/amd/aparapi/Kernel.java
index 900b344042d8e9ccf6209f0aa2da91e9142031fc..2d11bcce22f037ca2f3653ff72d0588de4303590 100644
--- a/src/aparapi/com.amd.aparapi/src/java/com/amd/aparapi/Kernel.java
+++ b/src/aparapi/com.amd.aparapi/src/java/com/amd/aparapi/Kernel.java
@@ -405,6 +405,80 @@ public abstract class Kernel implements Cloneable {
}
};
+ ////////////////////
+ // !!! oren change -> add source/binary flow support to kernel
+ ////////////////////
+ public static enum FlowType
+ {
+ // flow type list
+ SOURCE(com.amd.aparapi.internal.jni.KernelRunnerJNI.JNI_FLAG_SOURCE_FLOW),
+ BINARY(com.amd.aparapi.internal.jni.KernelRunnerJNI.JNI_FLAG_BINARY_FLOW),
+ DEFAULT(com.amd.aparapi.internal.jni.KernelRunnerJNI.JNI_FLAG_DEFAULT_FLOW);
+
+ // data store
+ int flowType;
+
+ FlowType(int flowType)
+ {
+ setValue(flowType);
+ }
+
+ FlowType(String flowTypeStr)
+ {
+ this.flowType = strToFlowType(flowTypeStr).getValue();
+ }
+
+ public int getValue()
+ {
+ return this.flowType;
+ }
+
+ private void setValue(int flowType)
+ {
+ this.flowType = flowType;
+ }
+
+ public static FlowType getDefaultFlowType()
+ {
+ // if set by user try get value else set to default
+ FlowType flowType = (Config.flowType==null) ? DEFAULT : strToFlowType(Config.flowType);
+ return flowType;
+ }
+
+ public static FlowType strToFlowType(final String flowTypeStr)
+ {
+ try
+ {
+ FlowType flowType = valueOf(flowTypeStr.toUpperCase());
+ return flowType;
+ }
+ catch (Exception e)
+ {
+ logger.info("!!! bad flow type => (" + flowTypeStr + ") => reverting to default platform flow!");
+ throw e;
+ }
+ }
+
+ }
+
+ public FlowType getFlowType() {
+ return kernelFlowType;
+ }
+
+
+ public void setFlowType(FlowType kernelFlowType) {
+ this.kernelFlowType = kernelFlowType;
+ }
+
+ public void setFlowType(String flowTypeStr) {
+ this.kernelFlowType = FlowType.strToFlowType(flowTypeStr);
+ }
+
+ private FlowType kernelFlowType = FlowType.getDefaultFlowType();
+
+
+ ////////////////////
+
private KernelRunner kernelRunner = null;
private KernelState kernelState = new KernelState();
@@ -616,7 +690,8 @@ public abstract class Kernel implements Cloneable {
return getGlobalId(0);
}
- @OpenCLDelegate
+
+@OpenCLDelegate
protected final int getGlobalId(int _dim) {
return kernelState.getGlobalIds()[_dim];
}
@@ -2816,4 +2891,5 @@ public abstract class Kernel implements Cloneable {
executionMode = currentMode.next();
}
}
+
}
diff --git a/src/aparapi/com.amd.aparapi/src/java/com/amd/aparapi/Range.java b/src/aparapi/com.amd.aparapi/src/java/com/amd/aparapi/Range.java
index 06fd2c6377c2cc959587e573cdc32bc5048d31be..34e3a4b5ac1e6a33db58640df381c7ee2daa501f 100644
--- a/src/aparapi/com.amd.aparapi/src/java/com/amd/aparapi/Range.java
+++ b/src/aparapi/com.amd.aparapi/src/java/com/amd/aparapi/Range.java
@@ -51,7 +51,11 @@ public class Range extends RangeJNI{
public static final int THREADS_PER_CORE = 16;
- public static final int MAX_OPENCL_GROUP_SIZE = 256;
+ // !!! oren change -> this value looks out dated and the mechanism probably needs revisiting !!!
+ // we already see evidence of improved performance for size==1024 on certain devices (ref: FPGA doc classification paper)
+ // for now we set it to 4X original value, but we should think about it more...
+ //public static final int MAX_OPENCL_GROUP_SIZE = 256;
+ public static final int MAX_OPENCL_GROUP_SIZE = 1024;
public static final int MAX_GROUP_SIZE = Math.max(Runtime.getRuntime().availableProcessors() * THREADS_PER_CORE,
MAX_OPENCL_GROUP_SIZE);
@@ -113,11 +117,18 @@ public class Range extends RangeJNI{
*/
private static int[] getFactors(int _value, int _max) {
- final int factors[] = new int[MAX_GROUP_SIZE];
+ //final int factors[] = new int[MAX_GROUP_SIZE];
int factorIdx = 0;
- for (int possibleFactor = 1; possibleFactor <= _max; possibleFactor++) {
- if ((_value % possibleFactor) == 0) {
+ // !!! oren bug fix -> based on poz findings
+ // max can not be bigger then value and if factorIdx >= MAX_GROUP_SIZE we will have an access violation
+ final int GroupSizeLimit = Math.min(Math.min(_max,_value),MAX_GROUP_SIZE);
+ final int factors[] = new int[GroupSizeLimit];
+ //for (int possibleFactor = 1; possibleFactor <= _max; possibleFactor++)
+ for (int possibleFactor = 1; possibleFactor <= GroupSizeLimit; possibleFactor++)
+ {
+ if ((_value % possibleFactor) == 0)
+ {
factors[factorIdx++] = possibleFactor;
}
}
diff --git a/src/aparapi/com.amd.aparapi/src/java/com/amd/aparapi/device/Device.java b/src/aparapi/com.amd.aparapi/src/java/com/amd/aparapi/device/Device.java
index 7fa7ec4c59e3ce4d44070e2aca4cfdb40ccf0bd3..f1c2f68e09c0d578956ed0b43c74b6360a368ea8 100644
--- a/src/aparapi/com.amd.aparapi/src/java/com/amd/aparapi/device/Device.java
+++ b/src/aparapi/com.amd.aparapi/src/java/com/amd/aparapi/device/Device.java
@@ -1,8 +1,11 @@
package com.amd.aparapi.device;
+import java.util.List;
+
import com.amd.aparapi.Range;
import com.amd.aparapi.device.OpenCLDevice.DeviceComparitor;
import com.amd.aparapi.device.OpenCLDevice.DeviceSelector;
+import com.amd.aparapi.internal.opencl.OpenCLPlatform;
public abstract class Device{
@@ -14,6 +17,98 @@ public abstract class Device{
JTP,
SEQ
};
+
+ // !!! oren change -> get device using the tuple (platform, deviceType, id)
+
+ public static Device getDevice(String platformName, Device.TYPE deviceType, int deviceId)
+ {
+ return getDevice(platformName,deviceType.name(),deviceId);
+ }
+
+ // get first available device
+
+ public static Device getDevice(String platformName, Device.TYPE deviceType)
+ {
+ return getDevice(platformName,deviceType.name(),0);
+ }
+
+ public static Device getDevice(String platformName, String deviceTypeName)
+ {
+ return getDevice(platformName,deviceTypeName,0);
+ }
+
+ public static Device getDevice(String platformName, String deviceTypeName, int deviceId)
+ {
+ List<OpenCLPlatform> platforms = (new OpenCLPlatform()).getOpenCLPlatforms();
+
+ int platformc = 0;
+ for (OpenCLPlatform platform : platforms)
+ {
+ if(platform.getName().contains(platformName))
+ {
+
+ System.out.println("Platform " + platformc + "{");
+
+ System.out.println(" Name : \"" + platform.getName() + "\"");
+
+ System.out.println(" Vendor : \"" + platform.getVendor() + "\"");
+
+ System.out.println(" Version : \"" + platform.getVersion() + "\"");
+
+ List<OpenCLDevice> devices = platform.getOpenCLDevices();
+
+ System.out.println(" Platform contains " + devices.size() + " OpenCL devices");
+
+ int devicec = 0;
+
+ for (OpenCLDevice device : devices)
+ {
+ if( device.getType().name().equalsIgnoreCase(deviceTypeName))
+ {
+
+ System.out.println(" Device " + devicec + "{");
+
+ System.out.println(" Type : " + device.getType());
+
+ System.out.println(" GlobalMemSize : " + device.getGlobalMemSize());
+
+ System.out.println(" LocalMemSize : " + device.getLocalMemSize());
+
+ System.out.println(" MaxComputeUnits : " + device.getMaxComputeUnits());
+
+ System.out.println(" MaxWorkGroupSizes : " + device.getMaxWorkGroupSize());
+
+ System.out.println(" MaxWorkItemDimensions : " + device.getMaxWorkItemDimensions());
+
+ System.out.println(" }");
+
+ if(deviceId>0 && (devicec!=deviceId))
+ {
+ System.out.println("!!! devicec!=deviceId(" + deviceId + ") => continue search !!!");
+ continue;
+ }
+
+ // close platform bracket
+ System.out.println("}");
+
+ return device;
+ }
+
+ devicec++;
+ }
+ System.out.println("Device type/id combination not found");
+
+ System.out.println("}");
+
+ platformc++;
+
+ }
+
+ }
+ // return not found !!!
+ return null;
+ }
+
public static Device best() {
return (OpenCLDevice.select(new DeviceComparitor(){
diff --git a/src/aparapi/com.amd.aparapi/src/java/com/amd/aparapi/internal/jni/KernelRunnerJNI.java b/src/aparapi/com.amd.aparapi/src/java/com/amd/aparapi/internal/jni/KernelRunnerJNI.java
index a672093fe97114a2e1fe4d5ed1fc63c73bb7e5bf..da60ff634120a890eeeebf3a6315e69de3c7e395 100644
--- a/src/aparapi/com.amd.aparapi/src/java/com/amd/aparapi/internal/jni/KernelRunnerJNI.java
+++ b/src/aparapi/com.amd.aparapi/src/java/com/amd/aparapi/internal/jni/KernelRunnerJNI.java
@@ -276,6 +276,19 @@ public abstract class KernelRunnerJNI{
*/
// @UsedByJNICode @Annotations.Experimental protected static final int JNI_FLAG_ENABLE_VERBOSE_JNI_OPENCL_RESOURCE_TRACKING = 1 << 4;
+ /** !!! oren change ->
+ * These flags indicate that we want to build source/binary i.e. use source/binary flow.
+ *
+ * Be careful changing final constants starting with JNI.<br/>
+ *
+ * @see com.amd.aparapi.internal.annotation.UsedByJNICode
+ *
+ * @author oren
+ */
+ @UsedByJNICode public static final int JNI_FLAG_SOURCE_FLOW = 1 << 0;
+ @UsedByJNICode public static final int JNI_FLAG_BINARY_FLOW = 1 << 1;
+ @UsedByJNICode public static final int JNI_FLAG_DEFAULT_FLOW = 1 << 2;
+
/*
* Native methods
*/
@@ -295,7 +308,9 @@ public abstract class KernelRunnerJNI{
protected native int getJNI(long _jniContextHandle, Object _array);
- protected native long buildProgramJNI(long _jniContextHandle, String _source);
+ //protected native long buildProgramJNI(long _jniContextHandle, String _source);
+ // !!! oren change -> add binary option to build
+ protected native long buildProgramJNI(long _jniContextHandle, String _source, int _buildFlags);
protected native int setArgsJNI(long _jniContextHandle, KernelArgJNI[] _args, int argc);
diff --git a/src/aparapi/com.amd.aparapi/src/java/com/amd/aparapi/internal/kernel/KernelRunner.java b/src/aparapi/com.amd.aparapi/src/java/com/amd/aparapi/internal/kernel/KernelRunner.java
index af7b5afa9332c896407951c1259a02deeefc4b59..c8578a9bc6ccaac8631a2e36cb2fbec19826b29c 100644
--- a/src/aparapi/com.amd.aparapi/src/java/com/amd/aparapi/internal/kernel/KernelRunner.java
+++ b/src/aparapi/com.amd.aparapi/src/java/com/amd/aparapi/internal/kernel/KernelRunner.java
@@ -1047,8 +1047,11 @@ public class KernelRunner extends KernelRunnerJNI{
e1.printStackTrace();
}
*/
+ // !!! oren change -> support flow types
+ // set flow type
+ int buildFlags = kernel.getFlowType().getValue();
// Send the string to OpenCL to compile it
- if (buildProgramJNI(jniContextHandle, openCL) == 0) {
+ if (buildProgramJNI(jniContextHandle, openCL,buildFlags) == 0) {
return warnFallBackAndExecute(_entrypointName, _range, _passes, "OpenCL compile failed");
}
diff --git a/src/aparapi/samples/add/add.jar b/src/aparapi/samples/add/add.jar
index bfdfc338d748f2d47dce6d35f6641722bb0f35c4..d3b27be0a3a87fa33d23810463f0b115fb033af0 100644
Binary files a/src/aparapi/samples/add/add.jar and b/src/aparapi/samples/add/add.jar differ
diff --git a/src/aparapi/samples/add/selectPlatform.sh b/src/aparapi/samples/add/selectPlatform.sh
new file mode 100644
index 0000000000000000000000000000000000000000..386ea6e808ac110d0d514f39957587451f59291d
--- /dev/null
+++ b/src/aparapi/samples/add/selectPlatform.sh
@@ -0,0 +1,42 @@
+
+#
+# select platform and flow test script.
+# runs add kernel on diffrent platforms/flows.
+#
+# $1 = platformHint [AMD,Altera,Intel,NVidia]
+# $2 = deviceType [CPU,CPU,ACC]
+# $3 = deviceId [0=first available device..n=available device]
+# $4 = flowTypeStr [source,binary,default]
+# can use -Dcom.amd.aparapi.flowType instead
+# $5 = dist.[std,fpga] to load -> !!! note -> remember to build and copy libaparapi_x86_64.so to the correct place
+# example: cp ../../com.amd.aparapi.jni/dist/libaparapi_x86_64.so ../../com.amd.aparapi.jni/dist.std/
+# Alternatively use a script verion below without $5 ->
+# -Djava.library.path=../../com.amd.aparapi.jni/dist \
+
+# usage examples:
+# sh selectPlatform.sh AMD CPU 0 source std
+# sh selectPlatform.sh Altera ACC 0 binary fpga
+
+java \
+ -Djava.library.path=../../com.amd.aparapi.jni/dist.$5 \
+ -classpath ../../com.amd.aparapi/dist/aparapi.jar:add.jar \
+ com.amd.aparapi.sample.add.MainSelectPlatform $1 $2 $3 $4
+
+#
+# other script variants uncomment to use ->
+#
+
+# script version with libaparapi_x86_64.so in dist
+#java \
+# -Djava.library.path=../../com.amd.aparapi.jni/dist \
+# -classpath ../../com.amd.aparapi/dist/aparapi.jar:add.jar \
+# com.amd.aparapi.sample.add.MainSelectPlatform $1 $2 $3
+
+# script version with flow type set through config
+#java \
+# -Djava.library.path=../../com.amd.aparapi.jni/dist.$5 \
+# -Dcom.amd.aparapi.flowType=$4 \
+# -classpath ../../com.amd.aparapi/dist/aparapi.jar:add.jar \
+# com.amd.aparapi.sample.add.MainSelectPlatform $1 $2 $3
+
+
diff --git a/src/aparapi/samples/add/src/com/amd/aparapi/sample/add/MainSelectPlatform.java b/src/aparapi/samples/add/src/com/amd/aparapi/sample/add/MainSelectPlatform.java
new file mode 100644
index 0000000000000000000000000000000000000000..053483ef860404f82a66a9874f93dd513f6123d7
--- /dev/null
+++ b/src/aparapi/samples/add/src/com/amd/aparapi/sample/add/MainSelectPlatform.java
@@ -0,0 +1,277 @@
+/*
+
+Copyright (c) 2010-2011, Advanced Micro Devices, Inc.
+
+All rights reserved.
+
+
+
+Redistribution and use in source and binary forms, with or without modification, are permitted provided that the
+
+following conditions are met:
+
+
+
+Redistributions of source code must retain the above copyright notice, this list of conditions and the following
+
+disclaimer.
+
+
+
+Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following
+
+disclaimer in the documentation and/or other materials provided with the distribution.
+
+
+
+Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products
+
+derived from this software without specific prior written permission.
+
+
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
+
+INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+
+WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+
+If you use the software (in whole or in part), you shall adhere to all applicable U.S., European, and other export
+
+laws, including but not limited to the U.S. Export Administration Regulations ("EAR"), (15 C.F.R. Sections 730 through
+
+774), and E.U. Council Regulation (EC) No 1334/2000 of 22 June 2000. Further, pursuant to Section 740.6 of the EAR,
+
+you hereby certify that, except pursuant to a license granted by the United States Department of Commerce Bureau of
+
+Industry and Security or as otherwise permitted pursuant to a License Exception under the U.S. Export Administration
+
+Regulations ("EAR"), you will not (1) export, re-export or release to a national of a country in Country Groups D:1,
+
+E:1 or E:2 any restricted technology, software, or source code you receive hereunder, or (2) export to Country Groups
+
+D:1, E:1 or E:2 the direct product of such technology or software, if such foreign produced direct product is subject
+
+to national security controls as identified on the Commerce Control List (currently found in Supplement 1 to Part 774
+
+of EAR). For the most current Country Group listings, or for additional information about the EAR or your obligations
+
+under those regulations, please refer to the U.S. Bureau of Industry and Security's website at http://www.bis.doc.gov/.
+
+
+
+*/
+
+
+
+package com.amd.aparapi.sample.add;
+
+
+
+import java.io.IOException;
+
+import java.util.concurrent.TimeUnit;
+
+
+
+import com.amd.aparapi.Kernel;
+
+import com.amd.aparapi.Range;
+
+
+import com.amd.aparapi.device.Device;
+
+import com.amd.aparapi.device.OpenCLDevice;
+
+import com.amd.aparapi.internal.opencl.OpenCLPlatform;
+
+
+import java.util.List;
+
+
+
+public class MainSelectPlatform {
+
+ public static void listPlatformsAndDevices()
+ {
+ List<OpenCLPlatform> platforms = (new OpenCLPlatform()).getOpenCLPlatforms();
+
+ int platformc = 0;
+ for (OpenCLPlatform platform : platforms)
+ {
+
+ System.out.println("Platform " + platformc + "{");
+
+ System.out.println(" Name : \"" + platform.getName() + "\"");
+
+ System.out.println(" Vendor : \"" + platform.getVendor() + "\"");
+
+ System.out.println(" Version : \"" + platform.getVersion() + "\"");
+
+ List<OpenCLDevice> devices = platform.getOpenCLDevices();
+
+ System.out.println(" Platform contains " + devices.size() + " OpenCL devices");
+
+ int devicec = 0;
+
+ for (OpenCLDevice device : devices)
+ {
+ System.out.println(" Device " + devicec + "{");
+
+ System.out.println(" Type : " + device.getType());
+
+ System.out.println(" GlobalMemSize : " + device.getGlobalMemSize());
+
+ System.out.println(" LocalMemSize : " + device.getLocalMemSize());
+
+ System.out.println(" MaxComputeUnits : " + device.getMaxComputeUnits());
+
+ System.out.println(" MaxWorkGroupSizes : " + device.getMaxWorkGroupSize());
+
+ System.out.println(" MaxWorkItemDimensions : " + device.getMaxWorkItemDimensions());
+
+ System.out.println(" }");
+
+ devicec++;
+ }
+
+ // close platform bracket
+ System.out.println("}");
+
+ platformc++;
+ }
+ }
+
+ public static void main(String[] args) {
+
+
+
+ final int size = 1000*1000;
+
+
+
+ final float[] a = new float[size];
+
+ final float[] b = new float[size];
+
+
+
+ for (int i = 0; i < size; i++) {
+
+ a[i] = (float) (Math.random() * 100);
+
+ b[i] = (float) (Math.random() * 100);
+
+ }
+
+
+
+ final float[] sum = new float[size];
+
+
+
+ Kernel kernel = new Kernel(){
+
+ @Override public void run() {
+
+ int gid = getGlobalId();
+
+ sum[gid] = a[gid] + b[gid];
+
+ }
+
+ };
+
+
+
+
+
+ // !!! oren -> add time measurement
+
+ System.out.printf("Running kernel..");
+
+
+
+ long startTime = System.nanoTime();
+
+
+ // !!! experiment with platform/device selection
+ System.out.printf("**** listPlatformsAndDevices ****\n");
+ listPlatformsAndDevices();
+ System.out.printf("****************\n");
+ if(args.length<2)
+ {
+ System.out.printf("****************\n");
+ System.out.printf("Usage is: select platformHint deviceType\n");
+ System.out.printf("****************\n");
+ return;
+ }
+
+ String platformHint = args[0];
+ String deviceType = args[1];
+ int deviceId = (args.length>2) ? Integer.parseInt(args[2]) : 0;
+ String flowTypeStr = (args.length>3) ? args[3] : null;
+ if(flowTypeStr!=null)
+ kernel.setFlowType(flowTypeStr);
+ System.out.printf("**** getDevice ****\n");
+ Device device = Device.getDevice(platformHint,deviceType,deviceId);
+ kernel.execute(Range.create(device,512,16));
+ System.out.printf("****************\n");
+
+
+
+ long elapsedTimeNano = System.nanoTime() - startTime;
+
+
+
+ long elapsedTimeSec = TimeUnit.SECONDS.convert(elapsedTimeNano, TimeUnit.NANOSECONDS);
+
+
+
+ long elapsedTimeMilli = TimeUnit.MILLISECONDS.convert(elapsedTimeNano, TimeUnit.NANOSECONDS);
+
+
+
+ System.out.printf("****************\n");
+
+ System.out.printf("Elapsed time in milli: %d\n",elapsedTimeMilli);
+
+ System.out.printf("Elapsed time in sec : %d\n",elapsedTimeSec);
+
+ System.out.printf("****************\n");
+
+
+
+ // !!! oren change -> show first 10 only
+
+ //for (int i = 0; i < size; i++) {
+
+ int displayRange = (size > 20) ? 20 : size;
+
+ System.out.printf("**************** Showing first %d results ****************\n",displayRange);
+
+ for (int i = 0; i < displayRange; i++) {
+
+ System.out.printf("%6.2f + %6.2f = %8.2f\n", a[i], b[i], sum[i]);
+
+ }
+
+
+
+ kernel.dispose();
+
+ }
+
+
+
+}