diff --git a/com.amd.aparapi.jni/build.xml b/com.amd.aparapi.jni/build.xml
index 0ea10815afe0cd76afee85172bc42436ad1b4966..035bc928bbeb0a01880f7613fa7cc47b23c6534c 100644
--- a/com.amd.aparapi.jni/build.xml
+++ b/com.amd.aparapi.jni/build.xml
@@ -73,6 +73,55 @@ First consider editing the properties in build.properties
<echo message="amd.app.sdk.dir ${amd.app.sdk.dir}"/>
+ <available property="linux.intel.app.sdk.exists" file="/opt/intel/opencl" type="dir"/>
+ <condition property="intel.app.sdk.dir" value="/opt/intel/opencl">
+ <and>
+ <os family="unix" />
+ <not>
+ <os family="mac" />
+ </not>
+ <isset property="linux.intel.app.sdk.exists" />
+ <not>
+ <isset property="win32.amd.app.sdk.exists" />
+ </not>
+ <not>
+ <isset property="win64.amd.app.sdk.exists" />
+ </not>
+ </and>
+ </condition>
+
+ <echo message=" intel.app.sdk.dir ${intel.app.sdk.dir}"/>
+
+ <condition property="vendor.name" value="amd">
+ <isset property="amd.app.sdk.dir" />
+ </condition>
+
+ <condition property="vendor.name" value="intel">
+ <and>
+ <isset property="intel.app.sdk.dir" />
+ <not>
+ <isset property="amd.app.sdk.dir" />
+ </not>
+ </and>
+ </condition>
+
+ <echo message=" vendor.name ${vendor.name}"/>
+
+ <condition property="app.sdk.dir" value="${amd.app.sdk.dir}">
+ <isset property="amd.app.sdk.dir" />
+ </condition>
+
+ <condition property="app.sdk.dir" value="${intel.app.sdk.dir}">
+ <and>
+ <isset property="intel.app.sdk.dir" />
+ <not>
+ <isset property="app.sdk.dir" />
+ </not>
+ </and>
+ </condition>
+
+ <echo message="app.sdk.dir ${app.sdk.dir}"/>
+
<!-- Check for Visual Studio Compiler -->
<!-- This needs to be in descending order to properly handle multiple installations -->
<available property="msvc.32.12.0.exists" file="C:/Program Files/Microsoft Visual Studio 12.0\VC\bin" type="dir"/>
@@ -321,6 +370,12 @@ First consider editing the properties in build.properties
<condition property="optional.x64.subdir" value="" else="x64\">
<equals arg1="${x86_or_x86_64}" arg2="x86"/>
</condition>
+
+
+ <condition property="optional.app.sdk.lib.subdir" value="lib\" else="lib64\">
+ <equals arg1="${x86_or_x86_64}" arg2="x86"/>
+
+ </condition>
<condition property="gcc.m.value" value="32" else="64">
<equals arg1="${x86_or_x86_64}" arg2="x86"/>
@@ -352,18 +407,18 @@ First consider editing the properties in build.properties
<os family="mac" />
</not>
<not>
- <isset property="amd.app.sdk.dir" />
+ <isset property="app.sdk.dir" />
</not>
</and>
</condition>
<![CDATA[
You will need to edit com.amd.aparapi.jni/build.xml to compile aparapi JNI code
- You need to set amd.app.sdk.dir to point to the location where AMD APP SDK is installed
+ You need to set app.sdk.dir to point to the location where OpenCL SDK is installed
]]>
</fail>
- <available file="${amd.app.sdk.dir}" type="dir" property="amd.app.sdk.dir.exists" />
+ <available file="${app.sdk.dir}" type="dir" property="app.sdk.dir.exists" />
<fail message="Error:">
<condition>
@@ -372,14 +427,14 @@ First consider editing the properties in build.properties
<os family="mac" />
</not>
<not>
- <isset property="amd.app.sdk.dir.exists" />
+ <isset property="app.sdk.dir.exists" />
</not>
</and>
</condition>
<![CDATA[
You will need to edit com.amd.aparapi.jni/build.xml to compile aparapi JNI code
- At present amd.app.sdk.dir is set (to ${amd.app.sdk.dir}) but that dir does not exist
+ At present app.sdk.dir is set (to ${app.sdk.dir}) but that dir does not exist
]]>
</fail>
</target>
@@ -458,7 +513,7 @@ First consider editing the properties in build.properties
<arg value="-I${java.home}/../include" />
<arg value="-I${java.home}/../include/linux" />
<arg value="-Iinclude" />
- <arg value="-I${amd.app.sdk.dir}/include" />
+ <arg value="-I${app.sdk.dir}/include" />
<arg value="-Isrc/cpp" />
<arg value="-Isrc/cpp/runKernel" />
<arg value="-Isrc/cpp/invoke" />
@@ -480,7 +535,8 @@ First consider editing the properties in build.properties
<arg value="src/cpp/classtools.cpp" />
<arg value="src/cpp/JNIHelper.cpp" />
<arg value="src/cpp/agent.cpp" />
- <arg value="-L${amd.app.sdk.dir}/lib/${x86_or_x86_64}" />
+ <arg value="-L${app.sdk.dir}/lib/${x86_or_x86_64}" />
+ <arg value="-L${app.sdk.dir}/${optional.app.sdk.lib.subdir}" />
<arg value="-lOpenCL" />
</exec>
</target>
@@ -543,7 +599,7 @@ First consider editing the properties in build.properties
<arg value="/I${java.home}\..\include" />
<arg value="/I${java.home}\..\include\win32" />
<arg value="/Iinclude" />
- <arg value="/I${amd.app.sdk.dir}\include" />
+ <arg value="/I${app.sdk.dir}\include" />
<arg value="/IC:\Program Files (x86)\Windows Kits\8.0\Include\shared" />
<arg value="/IC:\Program Files (x86)\Windows Kits\8.0\Include\um" />
<arg value="/Isrc/cpp" />
@@ -568,7 +624,8 @@ First consider editing the properties in build.properties
<arg value="/link" />
<arg value="/libpath:${msvc.dir}\vc\lib\${optional.amd64.subdir}" />
<arg value="/libpath:${msvc.sdk.dir}\lib\${optional.x64.subdir}" />
- <arg value="/libpath:${amd.app.sdk.dir}\lib\${x86_or_x86_64}" />
+ <arg value="/libpath:${app.sdk.dir}\lib\${x86_or_x86_64}" />
+ <arg value="/libpath:${app.sdk.dir}\${optional.app.sdk.lib.subdir}" />
<arg value="/libpath:C:\Program Files (x86)\Windows Kits\8.0\Lib\win8\um\x64" />
<arg value="OpenCL.lib" />
<arg value="/out:${basedir}\dist\aparapi_${x86_or_x86_64}.dll" />
@@ -588,7 +645,7 @@ First consider editing the properties in build.properties
<arg value="-DCL_USE_DEPRECATED_OPENCL_1_1_APIS"/>
<arg value="/I${msvc.dir}\vc\include" />
<arg value="/I${msvc.sdk.dir}\include" />
- <arg value="/I${amd.app.sdk.dir}\include" />
+ <arg value="/I${app.sdk.dir}\include" />
<arg value="/Isrc/cpp" />
<arg value="/Isrc/cpp/runKernel" />
<arg value="/Isrc/cpp/invoke" />
@@ -596,7 +653,8 @@ First consider editing the properties in build.properties
<arg value="/link" />
<arg value="/libpath:${msvc.dir}\vc\lib\${optional.amd64.subdir}" />
<arg value="/libpath:${msvc.sdk.dir}\lib\${optional.x64.subdir}" />
- <arg value="/libpath:${amd.app.sdk.dir}\lib\${x86_or_x86_64}" />
+ <arg value="/libpath:${app.sdk.dir}\lib\${x86_or_x86_64}" />
+ <arg value="/libpath:${app.sdk.dir}\${optional.app.sdk.lib.subdir}" />
<arg value="OpenCL.lib" />
<arg value="/out:${basedir}/dist/cltest_${x86_or_x86_64}.exe" />
</exec>
@@ -637,9 +695,10 @@ First consider editing the properties in build.properties
<arg value="-DCL_USE_DEPRECATED_OPENCL_1_1_APIS"/>
<arg value="-I${java.home}/../include" />
<arg value="-I${java.home}/../include/linux" />
- <arg value="-I${amd.app.sdk.dir}/include" />
+ <arg value="-I${app.sdk.dir}/include" />
<arg value="src/cpp/cltest.cpp" />
- <arg value="-L${amd.app.sdk.dir}/lib/${x86_or_x86_64}" />
+ <arg value="-L${app.sdk.dir}/lib/${x86_or_x86_64}" />
+ <arg value="-L${app.sdk.dir}/${optional.app.sdk.lib.subdir}" />
<arg value="-lOpenCL" />
<arg value="-o" />
<arg value="${basedir}/cltest_${x86_or_x86_64}" />
diff --git a/com.amd.aparapi.jni/src/cpp/cltest.cpp b/com.amd.aparapi.jni/src/cpp/cltest.cpp
index 9604dbeb9e44ea0677b44887185810f4193b04a4..9e7c90b8ef7cc9346ca16049a683c7bcd8f6a04c 100644
--- a/com.amd.aparapi.jni/src/cpp/cltest.cpp
+++ b/com.amd.aparapi.jni/src/cpp/cltest.cpp
@@ -126,7 +126,7 @@ int main(int argc, char **argv){
fprintf(stderr, " CL_PLATFORM_VERSION.\"%s\"\n", platformVersionName);
fprintf(stderr, " CL_PLATFORM_NAME....\"%s\"\n", platformName);
cl_uint deviceIdc;
- cl_device_type requestedDeviceType =CL_DEVICE_TYPE_CPU |CL_DEVICE_TYPE_GPU ;
+ cl_device_type requestedDeviceType =CL_DEVICE_TYPE_CPU |CL_DEVICE_TYPE_GPU | CL_DEVICE_TYPE_ACCELERATOR;
status = clGetDeviceIDs(platformIds[platformIdx], requestedDeviceType, 0, NULL, &deviceIdc);
fprintf(stderr, " Platform %d has %d device%s{\n", platformIdx, deviceIdc, ((deviceIdc==1)?"":"s"));
if (status == CL_SUCCESS && deviceIdc >0 ){
diff --git a/com.amd.aparapi.jni/src/cpp/invoke/OpenCLJNI.cpp b/com.amd.aparapi.jni/src/cpp/invoke/OpenCLJNI.cpp
index f35bcd12bbaf4a23f4f5cc015e36c945a715c342..ccfa62bfbae9254f8821dac1fa436380efeb6695 100644
--- a/com.amd.aparapi.jni/src/cpp/invoke/OpenCLJNI.cpp
+++ b/com.amd.aparapi.jni/src/cpp/invoke/OpenCLJNI.cpp
@@ -461,7 +461,7 @@ JNI_JAVA(jobject, OpenCLJNI, getPlatforms)
JNIHelper::callVoid(jenv, platformListInstance, "add", ArgsBooleanReturn(ObjectClassArg), platformInstance);
cl_uint deviceIdc;
- cl_device_type requestedDeviceType =CL_DEVICE_TYPE_CPU |CL_DEVICE_TYPE_GPU ;
+ cl_device_type requestedDeviceType =CL_DEVICE_TYPE_CPU |CL_DEVICE_TYPE_GPU | CL_DEVICE_TYPE_ACCELERATOR;
status = clGetDeviceIDs(platformIds[platformIdx], requestedDeviceType, 0, NULL, &deviceIdc);
if (status == CL_SUCCESS && deviceIdc > 0 ){
cl_device_id* deviceIds = new cl_device_id[deviceIdc];
@@ -489,7 +489,8 @@ JNI_JAVA(jobject, OpenCLJNI, getPlatforms)
}
if (deviceType & CL_DEVICE_TYPE_ACCELERATOR) {
deviceType &= ~CL_DEVICE_TYPE_ACCELERATOR;
- fprintf(stderr, "Accelerator ");
+ //fprintf(stderr, "Accelerator ");
+ deviceTypeEnumInstance = JNIHelper::getStaticFieldObject(jenv, DeviceTypeClass, "ACC", DeviceTypeClassArg);
}
//fprintf(stderr, "(0x%llx) ", deviceType);
//fprintf(stderr, "\n");
diff --git a/com.amd.aparapi.jni/src/cpp/runKernel/JNIContext.cpp b/com.amd.aparapi.jni/src/cpp/runKernel/JNIContext.cpp
index db9a5cfa498bf8a51b54915f52bf8ad7a310cc35..1b13151963aa66872aaefb2e630b808a43837129 100644
--- a/com.amd.aparapi.jni/src/cpp/runKernel/JNIContext.cpp
+++ b/com.amd.aparapi.jni/src/cpp/runKernel/JNIContext.cpp
@@ -13,6 +13,8 @@ JNIContext::JNIContext(JNIEnv *jenv, jobject _kernelObject, jobject _openCLDevic
deviceType(((flags&com_amd_aparapi_internal_jni_KernelRunnerJNI_JNI_FLAG_USE_GPU)==com_amd_aparapi_internal_jni_KernelRunnerJNI_JNI_FLAG_USE_GPU)?CL_DEVICE_TYPE_GPU:CL_DEVICE_TYPE_CPU),
profileFile(NULL),
valid(JNI_FALSE){
+ if (flags&com_amd_aparapi_internal_jni_KernelRunnerJNI_JNI_FLAG_USE_ACC)
+ deviceType = CL_DEVICE_TYPE_ACCELERATOR;
cl_int status = CL_SUCCESS;
jobject platformInstance = OpenCLDevice::getPlatformInstance(jenv, openCLDeviceObject);
cl_platform_id platformId = OpenCLPlatform::getPlatformId(jenv, platformInstance);
diff --git a/com.amd.aparapi.jni/src/cpp/runKernel/JNIContext.h b/com.amd.aparapi.jni/src/cpp/runKernel/JNIContext.h
index e22c5ff418b446c83ab6730b089240803e2d1370..aebad48a54ef7767be8694fca9165a4c03b47cdd 100644
--- a/com.amd.aparapi.jni/src/cpp/runKernel/JNIContext.h
+++ b/com.amd.aparapi.jni/src/cpp/runKernel/JNIContext.h
@@ -51,6 +51,12 @@ public:
return((flags&com_amd_aparapi_internal_jni_KernelRunnerJNI_JNI_FLAG_USE_GPU)==com_amd_aparapi_internal_jni_KernelRunnerJNI_JNI_FLAG_USE_GPU?JNI_TRUE:JNI_FALSE);
}
+ jboolean isUsingACC(){
+ //I'm pretty sure that this is equivalend to:
+ //return flags & com_amd_aparapi_internal_jni_KernelRunnerJNI_JNI_FLAG_USE_ACC;
+ return((flags&com_amd_aparapi_internal_jni_KernelRunnerJNI_JNI_FLAG_USE_ACC)==com_amd_aparapi_internal_jni_KernelRunnerJNI_JNI_FLAG_USE_ACC?JNI_TRUE:JNI_FALSE);
+ }
+
~JNIContext(){
}
diff --git a/com.amd.aparapi/src/java/com/amd/aparapi/Config.java b/com.amd.aparapi/src/java/com/amd/aparapi/Config.java
index 8f9df4d32b367539422d6c4e586c7f4edb256046..339ee89e9e482130aa26c93cd1904f72d3026460 100644
--- a/com.amd.aparapi/src/java/com/amd/aparapi/Config.java
+++ b/com.amd.aparapi/src/java/com/amd/aparapi/Config.java
@@ -78,7 +78,7 @@ public class Config extends ConfigJNI{
/**
* Allows the user to request a specific Kernel.EXECUTION_MODE enum value for all Kernels.
*
- * Usage -Dcom.amd.aparapi.executionMode={SEQ|JTP|CPU|GPU}
+ * Usage -Dcom.amd.aparapi.executionMode={SEQ|JTP|CPU|GPU|ACC}
*
* @see com.amd.aparapi.Kernel.EXECUTION_MODE
*/
@@ -188,7 +188,7 @@ public class Config extends ConfigJNI{
}
if (dumpFlags) {
- System.out.println(propPkgName + ".executionMode{GPU|CPU|JTP|SEQ}=" + executionMode);
+ System.out.println(propPkgName + ".executionMode{GPU|ACC|CPU|JTP|SEQ}=" + executionMode);
System.out.println(propPkgName + ".logLevel{OFF|FINEST|FINER|FINE|WARNING|SEVERE|ALL}=" + logger.getLevel());
System.out.println(propPkgName + ".enableProfiling{true|false}=" + enableProfiling);
System.out.println(propPkgName + ".enableProfilingCSV{true|false}=" + enableProfilingCSV);
diff --git a/com.amd.aparapi/src/java/com/amd/aparapi/Kernel.java b/com.amd.aparapi/src/java/com/amd/aparapi/Kernel.java
index 93c4cc4324b30e39a22c7c8e1e6b9bf857a4326b..f09dfb892d6a50ef594b53235dbbc1e21d493f5a 100644
--- a/com.amd.aparapi/src/java/com/amd/aparapi/Kernel.java
+++ b/com.amd.aparapi/src/java/com/amd/aparapi/Kernel.java
@@ -306,11 +306,12 @@ public abstract class Kernel implements Cloneable {
* determine how it executed.
*
* <p>
- * Aparapi supports 4 execution modes.
+ * Aparapi supports 5 execution modes. Default is GPU.
* <ul>
* <table>
* <tr><th align="left">Enum value</th><th align="left">Execution</th></tr>
* <tr><td><code><b>GPU</b></code></td><td>Execute using OpenCL on first available GPU device</td></tr>
+ * <tr><td><code><b>ACC</b></code></td><td>Execute using OpenCL on first available Accelerator device</td></tr>
* <tr><td><code><b>CPU</b></code></td><td>Execute using OpenCL on first available CPU device</td></tr>
* <tr><td><code><b>JTP</b></code></td><td>Execute using a Java Thread Pool (one thread spawned per available core)</td></tr>
* <tr><td><code><b>SEQ</b></code></td><td>Execute using a single loop. This is useful for debugging but will be less
@@ -329,7 +330,7 @@ public abstract class Kernel implements Cloneable {
* kernel.execute(values.length);
* </pre></blockquote>
* <p>
- * Alternatively, the property <code>com.amd.aparapi.executionMode</code> can be set to one of <code>JTP,GPU,CPU,SEQ</code>
+ * Alternatively, the property <code>com.amd.aparapi.executionMode</code> can be set to one of <code>JTP,GPU,ACC,CPU,SEQ</code>
* when an application is launched.
* <p><blockquote><pre>
* java -classpath ....;aparapi.jar -Dcom.amd.aparapi.executionMode=GPU MyApplication
@@ -369,7 +370,11 @@ public abstract class Kernel implements Cloneable {
* <p>
* This is meant to be used for debugging a kernel.
*/
- SEQ;
+ SEQ,
+ /**
+ * The value representing execution on an accelerator device (Xeon Phi) via OpenCL.
+ */
+ ACC;
static EXECUTION_MODE getDefaultExecutionMode() {
EXECUTION_MODE defaultExecutionMode = OpenCLLoader.isOpenCLAvailable() ? GPU : JTP;
@@ -446,7 +451,7 @@ public abstract class Kernel implements Cloneable {
static boolean anyOpenCL(LinkedHashSet<EXECUTION_MODE> _executionModes) {
for (final EXECUTION_MODE mode : _executionModes) {
- if ((mode == GPU) || (mode == CPU)) {
+ if ((mode == GPU) || (mode == ACC) || (mode == CPU)) {
return true;
}
}
@@ -454,7 +459,7 @@ public abstract class Kernel implements Cloneable {
}
public boolean isOpenCL() {
- return (this == GPU) || (this == CPU);
+ return (this == GPU) || (this == ACC) || (this == CPU);
}
};
diff --git a/com.amd.aparapi/src/java/com/amd/aparapi/device/Device.java b/com.amd.aparapi/src/java/com/amd/aparapi/device/Device.java
index 48b335159971e0a8471dc3a1789e57adc47449bc..a4bfcdeb9d6411ce52e9593e41d2fd9f3294a9eb 100644
--- a/com.amd.aparapi/src/java/com/amd/aparapi/device/Device.java
+++ b/com.amd.aparapi/src/java/com/amd/aparapi/device/Device.java
@@ -11,27 +11,47 @@ public abstract class Device{
GPU,
CPU,
JTP,
- SEQ
+ SEQ,
+ ACC
};
+ /**
+ * @return Now return the device of any types having the maximum compute units
+ */
public static Device best() {
return (OpenCLDevice.select(new DeviceComparitor(){
@Override public OpenCLDevice select(OpenCLDevice _deviceLhs, OpenCLDevice _deviceRhs) {
- if (_deviceLhs.getType() != _deviceRhs.getType()) {
- if (_deviceLhs.getType() == TYPE.GPU) {
- return (_deviceLhs);
- } else {
- return (_deviceRhs);
- }
+ if (_deviceLhs.getMaxComputeUnits() > _deviceRhs.getMaxComputeUnits()) {
+ return (_deviceLhs);
+ } else {
+ return (_deviceRhs);
}
+ }
+ }));
+ }
+ public static Device bestGPU() {
+ return (OpenCLDevice.select(new DeviceComparitor(){
+ @Override public OpenCLDevice select(OpenCLDevice _deviceLhs, OpenCLDevice _deviceRhs) {
if (_deviceLhs.getMaxComputeUnits() > _deviceRhs.getMaxComputeUnits()) {
return (_deviceLhs);
} else {
return (_deviceRhs);
}
}
- }));
+ }, Device.TYPE.GPU));
+ }
+
+ public static Device bestACC() {
+ return (OpenCLDevice.select(new DeviceComparitor(){
+ @Override public OpenCLDevice select(OpenCLDevice _deviceLhs, OpenCLDevice _deviceRhs) {
+ if (_deviceLhs.getMaxComputeUnits() > _deviceRhs.getMaxComputeUnits()) {
+ return (_deviceLhs);
+ } else {
+ return (_deviceRhs);
+ }
+ }
+ }, Device.TYPE.ACC));
}
public static Device first(final Device.TYPE _type) {
@@ -51,6 +71,11 @@ public abstract class Device{
}
+ public static Device firstACC() {
+ return (first(Device.TYPE.ACC));
+
+ }
+
protected TYPE type = TYPE.UNKNOWN;
protected int maxWorkGroupSize;
diff --git a/com.amd.aparapi/src/java/com/amd/aparapi/device/OpenCLDevice.java b/com.amd.aparapi/src/java/com/amd/aparapi/device/OpenCLDevice.java
index 588960586aec5e585d3bcfa9e6f54f8b5e10ce99..61bfe548a2b292191f91de30bc77f74a70a3b615 100644
--- a/com.amd.aparapi/src/java/com/amd/aparapi/device/OpenCLDevice.java
+++ b/com.amd.aparapi/src/java/com/amd/aparapi/device/OpenCLDevice.java
@@ -448,6 +448,25 @@ public class OpenCLDevice extends Device{
return (device);
}
+ public static OpenCLDevice select(DeviceComparitor _deviceComparitor, Device.TYPE _type) {
+ OpenCLDevice device = null;
+ final OpenCLPlatform platform = new OpenCLPlatform(0, null, null, null);
+
+ for (final OpenCLPlatform p : platform.getOpenCLPlatforms()) {
+ for (final OpenCLDevice d : p.getOpenCLDevices()) {
+ if (d.getType() != _type) continue;
+ if (device == null) {
+ device = d;
+ } else {
+ device = _deviceComparitor.select(device, d);
+ }
+ }
+ }
+
+ return (device);
+ }
+
+
@Override public String toString() {
final StringBuilder s = new StringBuilder("{");
boolean first = true;
diff --git a/com.amd.aparapi/src/java/com/amd/aparapi/internal/jni/KernelRunnerJNI.java b/com.amd.aparapi/src/java/com/amd/aparapi/internal/jni/KernelRunnerJNI.java
index 4520393007f56e94418ed144cbca8da2cf5a2ca5..d34926d2bb73a4bcf6afa629329bebf9e513336b 100644
--- a/com.amd.aparapi/src/java/com/amd/aparapi/internal/jni/KernelRunnerJNI.java
+++ b/com.amd.aparapi/src/java/com/amd/aparapi/internal/jni/KernelRunnerJNI.java
@@ -275,6 +275,18 @@ public abstract class KernelRunnerJNI{
* @author gfrost
*/
// @UsedByJNICode @Annotations.Experimental protected static final int JNI_FLAG_ENABLE_VERBOSE_JNI_OPENCL_RESOURCE_TRACKING = 1 << 4;
+
+ /**
+ * This 'bit' indicates that we want to execute on the Acceleratr.
+ *
+ * Be careful changing final constants starting with JNI.<br/>
+ *
+ * @see com.amd.aparapi.internal.annotation.UsedByJNICode
+ *
+ * @author ekasit
+ */
+ @UsedByJNICode protected static final int JNI_FLAG_USE_ACC = 1 << 5;
+
/*
* Native methods
diff --git a/com.amd.aparapi/src/java/com/amd/aparapi/internal/kernel/KernelRunner.java b/com.amd.aparapi/src/java/com/amd/aparapi/internal/kernel/KernelRunner.java
index cdb6c9d094007713ad2da09f1219b06e5f491682..ad66f9b39f69d8cc77473941971d7efefcb321b5 100644
--- a/com.amd.aparapi/src/java/com/amd/aparapi/internal/kernel/KernelRunner.java
+++ b/com.amd.aparapi/src/java/com/amd/aparapi/internal/kernel/KernelRunner.java
@@ -103,6 +103,8 @@ public class KernelRunner extends KernelRunnerJNI{
private int argc;
+ private boolean isFallBack = false; // If isFallBack, rebuild the kernel (necessary?)
+
private static final ForkJoinWorkerThreadFactory lowPriorityThreadFactory = new ForkJoinWorkerThreadFactory(){
@Override public ForkJoinWorkerThread newThread(ForkJoinPool pool) {
ForkJoinWorkerThread newThread = ForkJoinPool.defaultForkJoinWorkerThreadFactory.newThread(pool);
@@ -984,6 +986,7 @@ public class KernelRunner extends KernelRunnerJNI{
}
synchronized private Kernel fallBackAndExecute(String _entrypointName, final Range _range, final int _passes) {
+ isFallBack = true;
if (kernel.hasNextExecutionMode()) {
kernel.tryNextExecutionMode();
} else {
@@ -996,14 +999,14 @@ public class KernelRunner extends KernelRunnerJNI{
synchronized private Kernel warnFallBackAndExecute(String _entrypointName, final Range _range, final int _passes,
Exception _exception) {
if (logger.isLoggable(Level.WARNING)) {
- logger.warning("Reverting to Java Thread Pool (JTP) for " + kernel.getClass() + ": " + _exception.getMessage());
+ logger.warning("Reverting to the next execution mode for " + kernel.getClass() + ": " + _exception.getMessage());
_exception.printStackTrace();
}
return fallBackAndExecute(_entrypointName, _range, _passes);
}
synchronized private Kernel warnFallBackAndExecute(String _entrypointName, final Range _range, final int _passes, String _excuse) {
- logger.warning("Reverting to Java Thread Pool (JTP) for " + kernel.getClass() + ": " + _excuse);
+ logger.warning("Reverting to the next execution mode for " + kernel.getClass() + ": " + _excuse);
return fallBackAndExecute(_entrypointName, _range, _passes);
}
@@ -1023,12 +1026,14 @@ public class KernelRunner extends KernelRunnerJNI{
Device device = _range.getDevice();
if ((device == null) || (device instanceof OpenCLDevice)) {
- if (entryPoint == null) {
- try {
- final ClassModel classModel = ClassModel.createClassModel(kernel.getClass());
- entryPoint = classModel.getEntrypoint(_entrypointName, kernel);
- } catch (final Exception exception) {
- return warnFallBackAndExecute(_entrypointName, _range, _passes, exception);
+ if ((entryPoint == null) || (isFallBack)) {
+ if (entryPoint == null) {
+ try {
+ final ClassModel classModel = ClassModel.createClassModel(kernel.getClass());
+ entryPoint = classModel.getEntrypoint(_entrypointName, kernel);
+ } catch (final Exception exception) {
+ return warnFallBackAndExecute(_entrypointName, _range, _passes, exception);
+ }
}
if ((entryPoint != null) && !entryPoint.shouldFallback()) {
@@ -1042,10 +1047,19 @@ public class KernelRunner extends KernelRunnerJNI{
int jniFlags = 0;
if (openCLDevice == null) {
if (kernel.getExecutionMode().equals(EXECUTION_MODE.GPU)) {
- // We used to treat as before by getting first GPU device
- // now we get the best GPU
- openCLDevice = (OpenCLDevice) OpenCLDevice.best();
+ // Get the best GPU
+ openCLDevice = (OpenCLDevice) OpenCLDevice.bestGPU();
jniFlags |= JNI_FLAG_USE_GPU; // this flag might be redundant now.
+ if (openCLDevice == null) {
+ return warnFallBackAndExecute(_entrypointName, _range, _passes, "GPU request can't be honored");
+ }
+ } else if (kernel.getExecutionMode().equals(EXECUTION_MODE.ACC)) {
+ // Get the best ACC
+ openCLDevice = (OpenCLDevice) OpenCLDevice.bestACC();
+ jniFlags |= JNI_FLAG_USE_ACC; // this flag might be redundant now.
+ if (openCLDevice == null) {
+ return warnFallBackAndExecute(_entrypointName, _range, _passes, "ACC request can't be honored");
+ }
} else {
// We fetch the first CPU device
openCLDevice = (OpenCLDevice) OpenCLDevice.firstCPU();
@@ -1054,9 +1068,11 @@ public class KernelRunner extends KernelRunnerJNI{
"CPU request can't be honored not CPU device");
}
}
- } else {
+ } else { // openCLDevice == null
if (openCLDevice.getType() == Device.TYPE.GPU) {
jniFlags |= JNI_FLAG_USE_GPU; // this flag might be redundant now.
+ } else if (openCLDevice.getType() == Device.TYPE.ACC) {
+ jniFlags |= JNI_FLAG_USE_ACC; // this flag might be redundant now.
}
}
@@ -1255,24 +1271,26 @@ public class KernelRunner extends KernelRunnerJNI{
try {
executeOpenCL(_entrypointName, _range, _passes);
+ isFallBack = false;
} catch (final AparapiException e) {
warnFallBackAndExecute(_entrypointName, _range, _passes, e);
}
- } else {
+ } else { // (entryPoint != null) && !entryPoint.shouldFallback()
warnFallBackAndExecute(_entrypointName, _range, _passes, "failed to locate entrypoint");
}
- } else {
+ } else { // (entryPoint == null) || (isFallBack)
try {
executeOpenCL(_entrypointName, _range, _passes);
+ isFallBack = false;
} catch (final AparapiException e) {
warnFallBackAndExecute(_entrypointName, _range, _passes, e);
}
}
- } else {
+ } else { // (device == null) || (device instanceof OpenCLDevice)
warnFallBackAndExecute(_entrypointName, _range, _passes,
"OpenCL was requested but Device supplied was not an OpenCLDevice");
}
- } else {
+ } else { // kernel.getExecutionMode().isOpenCL()
executeJava(_range, _passes);
}
@@ -1387,14 +1405,16 @@ public class KernelRunner extends KernelRunnerJNI{
*/
public void get(Object array) {
if (explicit
- && ((kernel.getExecutionMode() == Kernel.EXECUTION_MODE.GPU) || (kernel.getExecutionMode() == Kernel.EXECUTION_MODE.CPU))) {
+ && ((kernel.getExecutionMode() == Kernel.EXECUTION_MODE.GPU)
+ || (kernel.getExecutionMode() == Kernel.EXECUTION_MODE.ACC) || (kernel.getExecutionMode() == Kernel.EXECUTION_MODE.CPU))) {
// Only makes sense when we are using OpenCL
getJNI(jniContextHandle, array);
}
}
public List<ProfileInfo> getProfileInfo() {
- if (((kernel.getExecutionMode() == Kernel.EXECUTION_MODE.GPU) || (kernel.getExecutionMode() == Kernel.EXECUTION_MODE.CPU))) {
+ if (((kernel.getExecutionMode() == Kernel.EXECUTION_MODE.GPU) || (kernel.getExecutionMode() == Kernel.EXECUTION_MODE.ACC) || (kernel
+ .getExecutionMode() == Kernel.EXECUTION_MODE.CPU))) {
// Only makes sense when we are using OpenCL
return (getProfileInfoJNI(jniContextHandle));
} else {
@@ -1419,7 +1439,8 @@ public class KernelRunner extends KernelRunnerJNI{
public void put(Object array) {
if (explicit
- && ((kernel.getExecutionMode() == Kernel.EXECUTION_MODE.GPU) || (kernel.getExecutionMode() == Kernel.EXECUTION_MODE.CPU))) {
+ && ((kernel.getExecutionMode() == Kernel.EXECUTION_MODE.GPU)
+ || (kernel.getExecutionMode() == Kernel.EXECUTION_MODE.ACC) || (kernel.getExecutionMode() == Kernel.EXECUTION_MODE.CPU))) {
// Only makes sense when we are using OpenCL
puts.add(array);
}
diff --git a/samples/info/src/com/amd/aparapi/sample/info/Main.java b/samples/info/src/com/amd/aparapi/sample/info/Main.java
index 9ed78b3c654d9db7231cb0ae264ffd9c1225c8be..fcff248937d1be7a55fed94e9bf5a047ca6ece9e 100644
--- a/samples/info/src/com/amd/aparapi/sample/info/Main.java
+++ b/samples/info/src/com/amd/aparapi/sample/info/Main.java
@@ -115,6 +115,48 @@ public class Main{
System.out.println("}");
}
+ Device bestGPU = OpenCLDevice.bestGPU();
+ if (bestGPU == null) {
+ System.out.println("OpenCLDevice.bestGPU() returned null!");
+ } else {
+ System.out.println("OpenCLDevice.bestGPU() returned { ");
+ System.out.println(" Type : " + bestGPU.getType());
+ System.out.println(" GlobalMemSize : " + ((OpenCLDevice) bestGPU).getGlobalMemSize());
+ System.out.println(" LocalMemSize : " + ((OpenCLDevice) bestGPU).getLocalMemSize());
+ System.out.println(" MaxComputeUnits : " + ((OpenCLDevice) bestGPU).getMaxComputeUnits());
+ System.out.println(" MaxWorkGroupSizes : " + ((OpenCLDevice) bestGPU).getMaxWorkGroupSize());
+ System.out.println(" MaxWorkItemDimensions : " + ((OpenCLDevice) bestGPU).getMaxWorkItemDimensions());
+ System.out.println("}");
+ }
+
+ Device firstACC = OpenCLDevice.firstACC();
+ if (firstACC == null) {
+ System.out.println("OpenCLDevice.firstACC() returned null!");
+ } else {
+ System.out.println("OpenCLDevice.firstACC() returned { ");
+ System.out.println(" Type : " + firstACC.getType());
+ System.out.println(" GlobalMemSize : " + ((OpenCLDevice) firstACC).getGlobalMemSize());
+ System.out.println(" LocalMemSize : " + ((OpenCLDevice) firstACC).getLocalMemSize());
+ System.out.println(" MaxComputeUnits : " + ((OpenCLDevice) firstACC).getMaxComputeUnits());
+ System.out.println(" MaxWorkGroupSizes : " + ((OpenCLDevice) firstACC).getMaxWorkGroupSize());
+ System.out.println(" MaxWorkItemDimensions : " + ((OpenCLDevice) firstACC).getMaxWorkItemDimensions());
+ System.out.println("}");
+ }
+
+ Device bestACC = OpenCLDevice.bestACC();
+ if (bestACC == null) {
+ System.out.println("OpenCLDevice.bestACC() returned null!");
+ } else {
+ System.out.println("OpenCLDevice.bestACC() returned { ");
+ System.out.println(" Type : " + bestACC.getType());
+ System.out.println(" GlobalMemSize : " + ((OpenCLDevice) bestACC).getGlobalMemSize());
+ System.out.println(" LocalMemSize : " + ((OpenCLDevice) bestACC).getLocalMemSize());
+ System.out.println(" MaxComputeUnits : " + ((OpenCLDevice) bestACC).getMaxComputeUnits());
+ System.out.println(" MaxWorkGroupSizes : " + ((OpenCLDevice) bestACC).getMaxWorkGroupSize());
+ System.out.println(" MaxWorkItemDimensions : " + ((OpenCLDevice) bestACC).getMaxWorkItemDimensions());
+ System.out.println("}");
+ }
+
}
}
diff --git a/samples/squares/squares.sh b/samples/squares/squares.sh
index 3fe88051b4ec2091322246c7380dc752575f0b76..bc469f262992630b971b8880ca6c716913bfef43 100644
--- a/samples/squares/squares.sh
+++ b/samples/squares/squares.sh
@@ -1,5 +1,5 @@
java \
-Djava.library.path=../../com.amd.aparapi.jni/dist \
- -Dcom.amd.aparapi.executionMode=%1 \
+ -Dcom.amd.aparapi.executionMode=$1 \
-classpath ../../com.amd.aparapi/dist/aparapi.jar:squares.jar \
com.amd.aparapi.sample.squares.Main