diff --git a/com.amd.aparapi.jni/build.xml b/com.amd.aparapi.jni/build.xml index 0ea10815afe0cd76afee85172bc42436ad1b4966..035bc928bbeb0a01880f7613fa7cc47b23c6534c 100644 --- a/com.amd.aparapi.jni/build.xml +++ b/com.amd.aparapi.jni/build.xml @@ -73,6 +73,55 @@ First consider editing the properties in build.properties <echo message="amd.app.sdk.dir ${amd.app.sdk.dir}"/> + <available property="linux.intel.app.sdk.exists" file="/opt/intel/opencl" type="dir"/> + <condition property="intel.app.sdk.dir" value="/opt/intel/opencl"> + <and> + <os family="unix" /> + <not> + <os family="mac" /> + </not> + <isset property="linux.intel.app.sdk.exists" /> + <not> + <isset property="win32.amd.app.sdk.exists" /> + </not> + <not> + <isset property="win64.amd.app.sdk.exists" /> + </not> + </and> + </condition> + + <echo message=" intel.app.sdk.dir ${intel.app.sdk.dir}"/> + + <condition property="vendor.name" value="amd"> + <isset property="amd.app.sdk.dir" /> + </condition> + + <condition property="vendor.name" value="intel"> + <and> + <isset property="intel.app.sdk.dir" /> + <not> + <isset property="amd.app.sdk.dir" /> + </not> + </and> + </condition> + + <echo message=" vendor.name ${vendor.name}"/> + + <condition property="app.sdk.dir" value="${amd.app.sdk.dir}"> + <isset property="amd.app.sdk.dir" /> + </condition> + + <condition property="app.sdk.dir" value="${intel.app.sdk.dir}"> + <and> + <isset property="intel.app.sdk.dir" /> + <not> + <isset property="app.sdk.dir" /> + </not> + </and> + </condition> + + <echo message="app.sdk.dir ${app.sdk.dir}"/> + <!-- Check for Visual Studio Compiler --> <!-- This needs to be in descending order to properly handle multiple installations --> <available property="msvc.32.12.0.exists" file="C:/Program Files/Microsoft Visual Studio 12.0\VC\bin" type="dir"/> @@ -321,6 +370,12 @@ First consider editing the properties in build.properties <condition property="optional.x64.subdir" value="" else="x64\"> <equals arg1="${x86_or_x86_64}" arg2="x86"/> </condition> + + + <condition property="optional.app.sdk.lib.subdir" value="lib\" else="lib64\"> + <equals arg1="${x86_or_x86_64}" arg2="x86"/> + + </condition> <condition property="gcc.m.value" value="32" else="64"> <equals arg1="${x86_or_x86_64}" arg2="x86"/> @@ -352,18 +407,18 @@ First consider editing the properties in build.properties <os family="mac" /> </not> <not> - <isset property="amd.app.sdk.dir" /> + <isset property="app.sdk.dir" /> </not> </and> </condition> <![CDATA[ You will need to edit com.amd.aparapi.jni/build.xml to compile aparapi JNI code - You need to set amd.app.sdk.dir to point to the location where AMD APP SDK is installed + You need to set app.sdk.dir to point to the location where OpenCL SDK is installed ]]> </fail> - <available file="${amd.app.sdk.dir}" type="dir" property="amd.app.sdk.dir.exists" /> + <available file="${app.sdk.dir}" type="dir" property="app.sdk.dir.exists" /> <fail message="Error:"> <condition> @@ -372,14 +427,14 @@ First consider editing the properties in build.properties <os family="mac" /> </not> <not> - <isset property="amd.app.sdk.dir.exists" /> + <isset property="app.sdk.dir.exists" /> </not> </and> </condition> <![CDATA[ You will need to edit com.amd.aparapi.jni/build.xml to compile aparapi JNI code - At present amd.app.sdk.dir is set (to ${amd.app.sdk.dir}) but that dir does not exist + At present app.sdk.dir is set (to ${app.sdk.dir}) but that dir does not exist ]]> </fail> </target> @@ -458,7 +513,7 @@ First consider editing the properties in build.properties <arg value="-I${java.home}/../include" /> <arg value="-I${java.home}/../include/linux" /> <arg value="-Iinclude" /> - <arg value="-I${amd.app.sdk.dir}/include" /> + <arg value="-I${app.sdk.dir}/include" /> <arg value="-Isrc/cpp" /> <arg value="-Isrc/cpp/runKernel" /> <arg value="-Isrc/cpp/invoke" /> @@ -480,7 +535,8 @@ First consider editing the properties in build.properties <arg value="src/cpp/classtools.cpp" /> <arg value="src/cpp/JNIHelper.cpp" /> <arg value="src/cpp/agent.cpp" /> - <arg value="-L${amd.app.sdk.dir}/lib/${x86_or_x86_64}" /> + <arg value="-L${app.sdk.dir}/lib/${x86_or_x86_64}" /> + <arg value="-L${app.sdk.dir}/${optional.app.sdk.lib.subdir}" /> <arg value="-lOpenCL" /> </exec> </target> @@ -543,7 +599,7 @@ First consider editing the properties in build.properties <arg value="/I${java.home}\..\include" /> <arg value="/I${java.home}\..\include\win32" /> <arg value="/Iinclude" /> - <arg value="/I${amd.app.sdk.dir}\include" /> + <arg value="/I${app.sdk.dir}\include" /> <arg value="/IC:\Program Files (x86)\Windows Kits\8.0\Include\shared" /> <arg value="/IC:\Program Files (x86)\Windows Kits\8.0\Include\um" /> <arg value="/Isrc/cpp" /> @@ -568,7 +624,8 @@ First consider editing the properties in build.properties <arg value="/link" /> <arg value="/libpath:${msvc.dir}\vc\lib\${optional.amd64.subdir}" /> <arg value="/libpath:${msvc.sdk.dir}\lib\${optional.x64.subdir}" /> - <arg value="/libpath:${amd.app.sdk.dir}\lib\${x86_or_x86_64}" /> + <arg value="/libpath:${app.sdk.dir}\lib\${x86_or_x86_64}" /> + <arg value="/libpath:${app.sdk.dir}\${optional.app.sdk.lib.subdir}" /> <arg value="/libpath:C:\Program Files (x86)\Windows Kits\8.0\Lib\win8\um\x64" /> <arg value="OpenCL.lib" /> <arg value="/out:${basedir}\dist\aparapi_${x86_or_x86_64}.dll" /> @@ -588,7 +645,7 @@ First consider editing the properties in build.properties <arg value="-DCL_USE_DEPRECATED_OPENCL_1_1_APIS"/> <arg value="/I${msvc.dir}\vc\include" /> <arg value="/I${msvc.sdk.dir}\include" /> - <arg value="/I${amd.app.sdk.dir}\include" /> + <arg value="/I${app.sdk.dir}\include" /> <arg value="/Isrc/cpp" /> <arg value="/Isrc/cpp/runKernel" /> <arg value="/Isrc/cpp/invoke" /> @@ -596,7 +653,8 @@ First consider editing the properties in build.properties <arg value="/link" /> <arg value="/libpath:${msvc.dir}\vc\lib\${optional.amd64.subdir}" /> <arg value="/libpath:${msvc.sdk.dir}\lib\${optional.x64.subdir}" /> - <arg value="/libpath:${amd.app.sdk.dir}\lib\${x86_or_x86_64}" /> + <arg value="/libpath:${app.sdk.dir}\lib\${x86_or_x86_64}" /> + <arg value="/libpath:${app.sdk.dir}\${optional.app.sdk.lib.subdir}" /> <arg value="OpenCL.lib" /> <arg value="/out:${basedir}/dist/cltest_${x86_or_x86_64}.exe" /> </exec> @@ -637,9 +695,10 @@ First consider editing the properties in build.properties <arg value="-DCL_USE_DEPRECATED_OPENCL_1_1_APIS"/> <arg value="-I${java.home}/../include" /> <arg value="-I${java.home}/../include/linux" /> - <arg value="-I${amd.app.sdk.dir}/include" /> + <arg value="-I${app.sdk.dir}/include" /> <arg value="src/cpp/cltest.cpp" /> - <arg value="-L${amd.app.sdk.dir}/lib/${x86_or_x86_64}" /> + <arg value="-L${app.sdk.dir}/lib/${x86_or_x86_64}" /> + <arg value="-L${app.sdk.dir}/${optional.app.sdk.lib.subdir}" /> <arg value="-lOpenCL" /> <arg value="-o" /> <arg value="${basedir}/cltest_${x86_or_x86_64}" /> diff --git a/com.amd.aparapi.jni/src/cpp/cltest.cpp b/com.amd.aparapi.jni/src/cpp/cltest.cpp index 9604dbeb9e44ea0677b44887185810f4193b04a4..9e7c90b8ef7cc9346ca16049a683c7bcd8f6a04c 100644 --- a/com.amd.aparapi.jni/src/cpp/cltest.cpp +++ b/com.amd.aparapi.jni/src/cpp/cltest.cpp @@ -126,7 +126,7 @@ int main(int argc, char **argv){ fprintf(stderr, " CL_PLATFORM_VERSION.\"%s\"\n", platformVersionName); fprintf(stderr, " CL_PLATFORM_NAME....\"%s\"\n", platformName); cl_uint deviceIdc; - cl_device_type requestedDeviceType =CL_DEVICE_TYPE_CPU |CL_DEVICE_TYPE_GPU ; + cl_device_type requestedDeviceType =CL_DEVICE_TYPE_CPU |CL_DEVICE_TYPE_GPU | CL_DEVICE_TYPE_ACCELERATOR; status = clGetDeviceIDs(platformIds[platformIdx], requestedDeviceType, 0, NULL, &deviceIdc); fprintf(stderr, " Platform %d has %d device%s{\n", platformIdx, deviceIdc, ((deviceIdc==1)?"":"s")); if (status == CL_SUCCESS && deviceIdc >0 ){ diff --git a/com.amd.aparapi.jni/src/cpp/invoke/OpenCLJNI.cpp b/com.amd.aparapi.jni/src/cpp/invoke/OpenCLJNI.cpp index f35bcd12bbaf4a23f4f5cc015e36c945a715c342..ccfa62bfbae9254f8821dac1fa436380efeb6695 100644 --- a/com.amd.aparapi.jni/src/cpp/invoke/OpenCLJNI.cpp +++ b/com.amd.aparapi.jni/src/cpp/invoke/OpenCLJNI.cpp @@ -461,7 +461,7 @@ JNI_JAVA(jobject, OpenCLJNI, getPlatforms) JNIHelper::callVoid(jenv, platformListInstance, "add", ArgsBooleanReturn(ObjectClassArg), platformInstance); cl_uint deviceIdc; - cl_device_type requestedDeviceType =CL_DEVICE_TYPE_CPU |CL_DEVICE_TYPE_GPU ; + cl_device_type requestedDeviceType =CL_DEVICE_TYPE_CPU |CL_DEVICE_TYPE_GPU | CL_DEVICE_TYPE_ACCELERATOR; status = clGetDeviceIDs(platformIds[platformIdx], requestedDeviceType, 0, NULL, &deviceIdc); if (status == CL_SUCCESS && deviceIdc > 0 ){ cl_device_id* deviceIds = new cl_device_id[deviceIdc]; @@ -489,7 +489,8 @@ JNI_JAVA(jobject, OpenCLJNI, getPlatforms) } if (deviceType & CL_DEVICE_TYPE_ACCELERATOR) { deviceType &= ~CL_DEVICE_TYPE_ACCELERATOR; - fprintf(stderr, "Accelerator "); + //fprintf(stderr, "Accelerator "); + deviceTypeEnumInstance = JNIHelper::getStaticFieldObject(jenv, DeviceTypeClass, "ACC", DeviceTypeClassArg); } //fprintf(stderr, "(0x%llx) ", deviceType); //fprintf(stderr, "\n"); diff --git a/com.amd.aparapi.jni/src/cpp/runKernel/JNIContext.cpp b/com.amd.aparapi.jni/src/cpp/runKernel/JNIContext.cpp index db9a5cfa498bf8a51b54915f52bf8ad7a310cc35..1b13151963aa66872aaefb2e630b808a43837129 100644 --- a/com.amd.aparapi.jni/src/cpp/runKernel/JNIContext.cpp +++ b/com.amd.aparapi.jni/src/cpp/runKernel/JNIContext.cpp @@ -13,6 +13,8 @@ JNIContext::JNIContext(JNIEnv *jenv, jobject _kernelObject, jobject _openCLDevic deviceType(((flags&com_amd_aparapi_internal_jni_KernelRunnerJNI_JNI_FLAG_USE_GPU)==com_amd_aparapi_internal_jni_KernelRunnerJNI_JNI_FLAG_USE_GPU)?CL_DEVICE_TYPE_GPU:CL_DEVICE_TYPE_CPU), profileFile(NULL), valid(JNI_FALSE){ + if (flags&com_amd_aparapi_internal_jni_KernelRunnerJNI_JNI_FLAG_USE_ACC) + deviceType = CL_DEVICE_TYPE_ACCELERATOR; cl_int status = CL_SUCCESS; jobject platformInstance = OpenCLDevice::getPlatformInstance(jenv, openCLDeviceObject); cl_platform_id platformId = OpenCLPlatform::getPlatformId(jenv, platformInstance); diff --git a/com.amd.aparapi.jni/src/cpp/runKernel/JNIContext.h b/com.amd.aparapi.jni/src/cpp/runKernel/JNIContext.h index e22c5ff418b446c83ab6730b089240803e2d1370..aebad48a54ef7767be8694fca9165a4c03b47cdd 100644 --- a/com.amd.aparapi.jni/src/cpp/runKernel/JNIContext.h +++ b/com.amd.aparapi.jni/src/cpp/runKernel/JNIContext.h @@ -51,6 +51,12 @@ public: return((flags&com_amd_aparapi_internal_jni_KernelRunnerJNI_JNI_FLAG_USE_GPU)==com_amd_aparapi_internal_jni_KernelRunnerJNI_JNI_FLAG_USE_GPU?JNI_TRUE:JNI_FALSE); } + jboolean isUsingACC(){ + //I'm pretty sure that this is equivalend to: + //return flags & com_amd_aparapi_internal_jni_KernelRunnerJNI_JNI_FLAG_USE_ACC; + return((flags&com_amd_aparapi_internal_jni_KernelRunnerJNI_JNI_FLAG_USE_ACC)==com_amd_aparapi_internal_jni_KernelRunnerJNI_JNI_FLAG_USE_ACC?JNI_TRUE:JNI_FALSE); + } + ~JNIContext(){ } diff --git a/com.amd.aparapi/src/java/com/amd/aparapi/Config.java b/com.amd.aparapi/src/java/com/amd/aparapi/Config.java index 8f9df4d32b367539422d6c4e586c7f4edb256046..339ee89e9e482130aa26c93cd1904f72d3026460 100644 --- a/com.amd.aparapi/src/java/com/amd/aparapi/Config.java +++ b/com.amd.aparapi/src/java/com/amd/aparapi/Config.java @@ -78,7 +78,7 @@ public class Config extends ConfigJNI{ /** * Allows the user to request a specific Kernel.EXECUTION_MODE enum value for all Kernels. * - * Usage -Dcom.amd.aparapi.executionMode={SEQ|JTP|CPU|GPU} + * Usage -Dcom.amd.aparapi.executionMode={SEQ|JTP|CPU|GPU|ACC} * * @see com.amd.aparapi.Kernel.EXECUTION_MODE */ @@ -188,7 +188,7 @@ public class Config extends ConfigJNI{ } if (dumpFlags) { - System.out.println(propPkgName + ".executionMode{GPU|CPU|JTP|SEQ}=" + executionMode); + System.out.println(propPkgName + ".executionMode{GPU|ACC|CPU|JTP|SEQ}=" + executionMode); System.out.println(propPkgName + ".logLevel{OFF|FINEST|FINER|FINE|WARNING|SEVERE|ALL}=" + logger.getLevel()); System.out.println(propPkgName + ".enableProfiling{true|false}=" + enableProfiling); System.out.println(propPkgName + ".enableProfilingCSV{true|false}=" + enableProfilingCSV); diff --git a/com.amd.aparapi/src/java/com/amd/aparapi/Kernel.java b/com.amd.aparapi/src/java/com/amd/aparapi/Kernel.java index 0069d4e46fcb97d834b5a4c5af48e3d84dc710f9..e01dba3f29b6fbb83bb7fd1f1ace19e805c447a0 100644 --- a/com.amd.aparapi/src/java/com/amd/aparapi/Kernel.java +++ b/com.amd.aparapi/src/java/com/amd/aparapi/Kernel.java @@ -302,11 +302,12 @@ public abstract class Kernel implements Cloneable { * determine how it executed. * * <p> - * Aparapi supports 4 execution modes. + * Aparapi supports 5 execution modes. Default is GPU. * <ul> * <table> * <tr><th align="left">Enum value</th><th align="left">Execution</th></tr> * <tr><td><code><b>GPU</b></code></td><td>Execute using OpenCL on first available GPU device</td></tr> + * <tr><td><code><b>ACC</b></code></td><td>Execute using OpenCL on first available Accelerator device</td></tr> * <tr><td><code><b>CPU</b></code></td><td>Execute using OpenCL on first available CPU device</td></tr> * <tr><td><code><b>JTP</b></code></td><td>Execute using a Java Thread Pool (one thread spawned per available core)</td></tr> * <tr><td><code><b>SEQ</b></code></td><td>Execute using a single loop. This is useful for debugging but will be less @@ -325,7 +326,7 @@ public abstract class Kernel implements Cloneable { * kernel.execute(values.length); * </pre></blockquote> * <p> - * Alternatively, the property <code>com.amd.aparapi.executionMode</code> can be set to one of <code>JTP,GPU,CPU,SEQ</code> + * Alternatively, the property <code>com.amd.aparapi.executionMode</code> can be set to one of <code>JTP,GPU,ACC,CPU,SEQ</code> * when an application is launched. * <p><blockquote><pre> * java -classpath ....;aparapi.jar -Dcom.amd.aparapi.executionMode=GPU MyApplication @@ -365,7 +366,11 @@ public abstract class Kernel implements Cloneable { * <p> * This is meant to be used for debugging a kernel. */ - SEQ; + SEQ, + /** + * The value representing execution on an accelerator device (Xeon Phi) via OpenCL. + */ + ACC; static EXECUTION_MODE getDefaultExecutionMode() { EXECUTION_MODE defaultExecutionMode = OpenCLLoader.isOpenCLAvailable() ? GPU : JTP; @@ -442,7 +447,7 @@ public abstract class Kernel implements Cloneable { static boolean anyOpenCL(LinkedHashSet<EXECUTION_MODE> _executionModes) { for (final EXECUTION_MODE mode : _executionModes) { - if ((mode == GPU) || (mode == CPU)) { + if ((mode == GPU) || (mode == ACC) || (mode == CPU)) { return true; } } @@ -450,7 +455,7 @@ public abstract class Kernel implements Cloneable { } public boolean isOpenCL() { - return (this == GPU) || (this == CPU); + return (this == GPU) || (this == ACC) || (this == CPU); } }; diff --git a/com.amd.aparapi/src/java/com/amd/aparapi/device/Device.java b/com.amd.aparapi/src/java/com/amd/aparapi/device/Device.java index 48b335159971e0a8471dc3a1789e57adc47449bc..a4bfcdeb9d6411ce52e9593e41d2fd9f3294a9eb 100644 --- a/com.amd.aparapi/src/java/com/amd/aparapi/device/Device.java +++ b/com.amd.aparapi/src/java/com/amd/aparapi/device/Device.java @@ -11,27 +11,47 @@ public abstract class Device{ GPU, CPU, JTP, - SEQ + SEQ, + ACC }; + /** + * @return Now return the device of any types having the maximum compute units + */ public static Device best() { return (OpenCLDevice.select(new DeviceComparitor(){ @Override public OpenCLDevice select(OpenCLDevice _deviceLhs, OpenCLDevice _deviceRhs) { - if (_deviceLhs.getType() != _deviceRhs.getType()) { - if (_deviceLhs.getType() == TYPE.GPU) { - return (_deviceLhs); - } else { - return (_deviceRhs); - } + if (_deviceLhs.getMaxComputeUnits() > _deviceRhs.getMaxComputeUnits()) { + return (_deviceLhs); + } else { + return (_deviceRhs); } + } + })); + } + public static Device bestGPU() { + return (OpenCLDevice.select(new DeviceComparitor(){ + @Override public OpenCLDevice select(OpenCLDevice _deviceLhs, OpenCLDevice _deviceRhs) { if (_deviceLhs.getMaxComputeUnits() > _deviceRhs.getMaxComputeUnits()) { return (_deviceLhs); } else { return (_deviceRhs); } } - })); + }, Device.TYPE.GPU)); + } + + public static Device bestACC() { + return (OpenCLDevice.select(new DeviceComparitor(){ + @Override public OpenCLDevice select(OpenCLDevice _deviceLhs, OpenCLDevice _deviceRhs) { + if (_deviceLhs.getMaxComputeUnits() > _deviceRhs.getMaxComputeUnits()) { + return (_deviceLhs); + } else { + return (_deviceRhs); + } + } + }, Device.TYPE.ACC)); } public static Device first(final Device.TYPE _type) { @@ -51,6 +71,11 @@ public abstract class Device{ } + public static Device firstACC() { + return (first(Device.TYPE.ACC)); + + } + protected TYPE type = TYPE.UNKNOWN; protected int maxWorkGroupSize; diff --git a/com.amd.aparapi/src/java/com/amd/aparapi/device/OpenCLDevice.java b/com.amd.aparapi/src/java/com/amd/aparapi/device/OpenCLDevice.java index 588960586aec5e585d3bcfa9e6f54f8b5e10ce99..61bfe548a2b292191f91de30bc77f74a70a3b615 100644 --- a/com.amd.aparapi/src/java/com/amd/aparapi/device/OpenCLDevice.java +++ b/com.amd.aparapi/src/java/com/amd/aparapi/device/OpenCLDevice.java @@ -448,6 +448,25 @@ public class OpenCLDevice extends Device{ return (device); } + public static OpenCLDevice select(DeviceComparitor _deviceComparitor, Device.TYPE _type) { + OpenCLDevice device = null; + final OpenCLPlatform platform = new OpenCLPlatform(0, null, null, null); + + for (final OpenCLPlatform p : platform.getOpenCLPlatforms()) { + for (final OpenCLDevice d : p.getOpenCLDevices()) { + if (d.getType() != _type) continue; + if (device == null) { + device = d; + } else { + device = _deviceComparitor.select(device, d); + } + } + } + + return (device); + } + + @Override public String toString() { final StringBuilder s = new StringBuilder("{"); boolean first = true; diff --git a/com.amd.aparapi/src/java/com/amd/aparapi/internal/jni/KernelRunnerJNI.java b/com.amd.aparapi/src/java/com/amd/aparapi/internal/jni/KernelRunnerJNI.java index 4520393007f56e94418ed144cbca8da2cf5a2ca5..d34926d2bb73a4bcf6afa629329bebf9e513336b 100644 --- a/com.amd.aparapi/src/java/com/amd/aparapi/internal/jni/KernelRunnerJNI.java +++ b/com.amd.aparapi/src/java/com/amd/aparapi/internal/jni/KernelRunnerJNI.java @@ -275,6 +275,18 @@ public abstract class KernelRunnerJNI{ * @author gfrost */ // @UsedByJNICode @Annotations.Experimental protected static final int JNI_FLAG_ENABLE_VERBOSE_JNI_OPENCL_RESOURCE_TRACKING = 1 << 4; + + /** + * This 'bit' indicates that we want to execute on the Acceleratr. + * + * Be careful changing final constants starting with JNI.<br/> + * + * @see com.amd.aparapi.internal.annotation.UsedByJNICode + * + * @author ekasit + */ + @UsedByJNICode protected static final int JNI_FLAG_USE_ACC = 1 << 5; + /* * Native methods diff --git a/com.amd.aparapi/src/java/com/amd/aparapi/internal/kernel/KernelRunner.java b/com.amd.aparapi/src/java/com/amd/aparapi/internal/kernel/KernelRunner.java index 821364c85bccf5906c55b4aae45855a5e66b9d8c..e51c9ef59bdc0d0dfdb8b4b71d9a0fee084c5452 100644 --- a/com.amd.aparapi/src/java/com/amd/aparapi/internal/kernel/KernelRunner.java +++ b/com.amd.aparapi/src/java/com/amd/aparapi/internal/kernel/KernelRunner.java @@ -100,6 +100,8 @@ public class KernelRunner extends KernelRunnerJNI{ private Entrypoint entryPoint; private int argc; + + private boolean isFallBack = false; // If isFallBack, rebuild the kernel (necessary?) private final ExecutorService threadPool = Executors.newCachedThreadPool(); /** @@ -902,6 +904,7 @@ public class KernelRunner extends KernelRunnerJNI{ } synchronized private Kernel fallBackAndExecute(String _entrypointName, final Range _range, final int _passes) { + isFallBack = true; if (kernel.hasNextExecutionMode()) { kernel.tryNextExecutionMode(); } else { @@ -914,14 +917,14 @@ public class KernelRunner extends KernelRunnerJNI{ synchronized private Kernel warnFallBackAndExecute(String _entrypointName, final Range _range, final int _passes, Exception _exception) { if (logger.isLoggable(Level.WARNING)) { - logger.warning("Reverting to Java Thread Pool (JTP) for " + kernel.getClass() + ": " + _exception.getMessage()); + logger.warning("Reverting to the next execution mode for " + kernel.getClass() + ": " + _exception.getMessage()); _exception.printStackTrace(); } return fallBackAndExecute(_entrypointName, _range, _passes); } synchronized private Kernel warnFallBackAndExecute(String _entrypointName, final Range _range, final int _passes, String _excuse) { - logger.warning("Reverting to Java Thread Pool (JTP) for " + kernel.getClass() + ": " + _excuse); + logger.warning("Reverting to the next execution mode for " + kernel.getClass() + ": " + _excuse); return fallBackAndExecute(_entrypointName, _range, _passes); } @@ -941,12 +944,14 @@ public class KernelRunner extends KernelRunnerJNI{ Device device = _range.getDevice(); if ((device == null) || (device instanceof OpenCLDevice)) { - if (entryPoint == null) { - try { - final ClassModel classModel = new ClassModel(kernel.getClass()); - entryPoint = classModel.getEntrypoint(_entrypointName, kernel); - } catch (final Exception exception) { - return warnFallBackAndExecute(_entrypointName, _range, _passes, exception); + if ((entryPoint == null) || (isFallBack)) { + if (entryPoint == null) { + try { + final ClassModel classModel = new ClassModel(kernel.getClass()); + entryPoint = classModel.getEntrypoint(_entrypointName, kernel); + } catch (final Exception exception) { + return warnFallBackAndExecute(_entrypointName, _range, _passes, exception); + } } if ((entryPoint != null) && !entryPoint.shouldFallback()) { @@ -960,10 +965,19 @@ public class KernelRunner extends KernelRunnerJNI{ int jniFlags = 0; if (openCLDevice == null) { if (kernel.getExecutionMode().equals(EXECUTION_MODE.GPU)) { - // We used to treat as before by getting first GPU device - // now we get the best GPU - openCLDevice = (OpenCLDevice) OpenCLDevice.best(); + // Get the best GPU + openCLDevice = (OpenCLDevice) OpenCLDevice.bestGPU(); jniFlags |= JNI_FLAG_USE_GPU; // this flag might be redundant now. + if (openCLDevice == null) { + return warnFallBackAndExecute(_entrypointName, _range, _passes, "GPU request can't be honored"); + } + } else if (kernel.getExecutionMode().equals(EXECUTION_MODE.ACC)) { + // Get the best ACC + openCLDevice = (OpenCLDevice) OpenCLDevice.bestACC(); + jniFlags |= JNI_FLAG_USE_ACC; // this flag might be redundant now. + if (openCLDevice == null) { + return warnFallBackAndExecute(_entrypointName, _range, _passes, "ACC request can't be honored"); + } } else { // We fetch the first CPU device openCLDevice = (OpenCLDevice) OpenCLDevice.firstCPU(); @@ -972,9 +986,11 @@ public class KernelRunner extends KernelRunnerJNI{ "CPU request can't be honored not CPU device"); } } - } else { + } else { // openCLDevice == null if (openCLDevice.getType() == Device.TYPE.GPU) { jniFlags |= JNI_FLAG_USE_GPU; // this flag might be redundant now. + } else if (openCLDevice.getType() == Device.TYPE.ACC) { + jniFlags |= JNI_FLAG_USE_ACC; // this flag might be redundant now. } } @@ -1019,6 +1035,7 @@ public class KernelRunner extends KernelRunnerJNI{ && hasGlobalInt32ExtendedAtomicsSupport() && hasLocalInt32BaseAtomicsSupport() && hasLocalInt32ExtendedAtomicsSupport(); + if (entryPoint.requiresAtomic32Pragma() && !all32AtomicsAvailable) { return warnFallBackAndExecute(_entrypointName, _range, _passes, "32 bit Atomics required but not supported"); @@ -1162,11 +1179,11 @@ public class KernelRunner extends KernelRunnerJNI{ } i++; - } + } // at this point, i = the actual used number of arguments // (private buffers do not get treated as arguments) - + argc = i; setArgsJNI(jniContextHandle, args, argc); @@ -1175,24 +1192,26 @@ public class KernelRunner extends KernelRunnerJNI{ try { executeOpenCL(_entrypointName, _range, _passes); + isFallBack = false; } catch (final AparapiException e) { warnFallBackAndExecute(_entrypointName, _range, _passes, e); } - } else { + } else { // (entryPoint != null) && !entryPoint.shouldFallback() warnFallBackAndExecute(_entrypointName, _range, _passes, "failed to locate entrypoint"); } - } else { + } else { // (entryPoint == null) || (isFallBack) try { executeOpenCL(_entrypointName, _range, _passes); + isFallBack = false; } catch (final AparapiException e) { warnFallBackAndExecute(_entrypointName, _range, _passes, e); } } - } else { + } else { // (device == null) || (device instanceof OpenCLDevice) warnFallBackAndExecute(_entrypointName, _range, _passes, "OpenCL was requested but Device supplied was not an OpenCLDevice"); } - } else { + } else { // kernel.getExecutionMode().isOpenCL() executeJava(_range, _passes); } @@ -1308,14 +1327,14 @@ public class KernelRunner extends KernelRunnerJNI{ */ public void get(Object array) { if (explicit - && ((kernel.getExecutionMode() == Kernel.EXECUTION_MODE.GPU) || (kernel.getExecutionMode() == Kernel.EXECUTION_MODE.CPU))) { + && ((kernel.getExecutionMode() == Kernel.EXECUTION_MODE.GPU) || (kernel.getExecutionMode() == Kernel.EXECUTION_MODE.ACC) || (kernel.getExecutionMode() == Kernel.EXECUTION_MODE.CPU))) { // Only makes sense when we are using OpenCL getJNI(jniContextHandle, array); } } public List<ProfileInfo> getProfileInfo() { - if (((kernel.getExecutionMode() == Kernel.EXECUTION_MODE.GPU) || (kernel.getExecutionMode() == Kernel.EXECUTION_MODE.CPU))) { + if (((kernel.getExecutionMode() == Kernel.EXECUTION_MODE.GPU) || (kernel.getExecutionMode() == Kernel.EXECUTION_MODE.ACC) || (kernel.getExecutionMode() == Kernel.EXECUTION_MODE.CPU))) { // Only makes sense when we are using OpenCL return (getProfileInfoJNI(jniContextHandle)); } else { @@ -1340,7 +1359,7 @@ public class KernelRunner extends KernelRunnerJNI{ public void put(Object array) { if (explicit - && ((kernel.getExecutionMode() == Kernel.EXECUTION_MODE.GPU) || (kernel.getExecutionMode() == Kernel.EXECUTION_MODE.CPU))) { + && ((kernel.getExecutionMode() == Kernel.EXECUTION_MODE.GPU) || (kernel.getExecutionMode() == Kernel.EXECUTION_MODE.ACC) || (kernel.getExecutionMode() == Kernel.EXECUTION_MODE.CPU))) { // Only makes sense when we are using OpenCL puts.add(array); } diff --git a/samples/info/src/com/amd/aparapi/sample/info/Main.java b/samples/info/src/com/amd/aparapi/sample/info/Main.java index 9ed78b3c654d9db7231cb0ae264ffd9c1225c8be..fcff248937d1be7a55fed94e9bf5a047ca6ece9e 100644 --- a/samples/info/src/com/amd/aparapi/sample/info/Main.java +++ b/samples/info/src/com/amd/aparapi/sample/info/Main.java @@ -115,6 +115,48 @@ public class Main{ System.out.println("}"); } + Device bestGPU = OpenCLDevice.bestGPU(); + if (bestGPU == null) { + System.out.println("OpenCLDevice.bestGPU() returned null!"); + } else { + System.out.println("OpenCLDevice.bestGPU() returned { "); + System.out.println(" Type : " + bestGPU.getType()); + System.out.println(" GlobalMemSize : " + ((OpenCLDevice) bestGPU).getGlobalMemSize()); + System.out.println(" LocalMemSize : " + ((OpenCLDevice) bestGPU).getLocalMemSize()); + System.out.println(" MaxComputeUnits : " + ((OpenCLDevice) bestGPU).getMaxComputeUnits()); + System.out.println(" MaxWorkGroupSizes : " + ((OpenCLDevice) bestGPU).getMaxWorkGroupSize()); + System.out.println(" MaxWorkItemDimensions : " + ((OpenCLDevice) bestGPU).getMaxWorkItemDimensions()); + System.out.println("}"); + } + + Device firstACC = OpenCLDevice.firstACC(); + if (firstACC == null) { + System.out.println("OpenCLDevice.firstACC() returned null!"); + } else { + System.out.println("OpenCLDevice.firstACC() returned { "); + System.out.println(" Type : " + firstACC.getType()); + System.out.println(" GlobalMemSize : " + ((OpenCLDevice) firstACC).getGlobalMemSize()); + System.out.println(" LocalMemSize : " + ((OpenCLDevice) firstACC).getLocalMemSize()); + System.out.println(" MaxComputeUnits : " + ((OpenCLDevice) firstACC).getMaxComputeUnits()); + System.out.println(" MaxWorkGroupSizes : " + ((OpenCLDevice) firstACC).getMaxWorkGroupSize()); + System.out.println(" MaxWorkItemDimensions : " + ((OpenCLDevice) firstACC).getMaxWorkItemDimensions()); + System.out.println("}"); + } + + Device bestACC = OpenCLDevice.bestACC(); + if (bestACC == null) { + System.out.println("OpenCLDevice.bestACC() returned null!"); + } else { + System.out.println("OpenCLDevice.bestACC() returned { "); + System.out.println(" Type : " + bestACC.getType()); + System.out.println(" GlobalMemSize : " + ((OpenCLDevice) bestACC).getGlobalMemSize()); + System.out.println(" LocalMemSize : " + ((OpenCLDevice) bestACC).getLocalMemSize()); + System.out.println(" MaxComputeUnits : " + ((OpenCLDevice) bestACC).getMaxComputeUnits()); + System.out.println(" MaxWorkGroupSizes : " + ((OpenCLDevice) bestACC).getMaxWorkGroupSize()); + System.out.println(" MaxWorkItemDimensions : " + ((OpenCLDevice) bestACC).getMaxWorkItemDimensions()); + System.out.println("}"); + } + } } diff --git a/samples/squares/squares.sh b/samples/squares/squares.sh index 3fe88051b4ec2091322246c7380dc752575f0b76..bc469f262992630b971b8880ca6c716913bfef43 100644 --- a/samples/squares/squares.sh +++ b/samples/squares/squares.sh @@ -1,5 +1,5 @@ java \ -Djava.library.path=../../com.amd.aparapi.jni/dist \ - -Dcom.amd.aparapi.executionMode=%1 \ + -Dcom.amd.aparapi.executionMode=$1 \ -classpath ../../com.amd.aparapi/dist/aparapi.jar:squares.jar \ com.amd.aparapi.sample.squares.Main