diff --git a/com.amd.aparapi.jni/src/cpp/CLHelper.cpp b/com.amd.aparapi.jni/src/cpp/CLHelper.cpp
index 1d0752e7d23bef40876cdc5dd393884b3737566b..acf91311e665af7a9d7888b8ed37a0795cf4c3d5 100644
--- a/com.amd.aparapi.jni/src/cpp/CLHelper.cpp
+++ b/com.amd.aparapi.jni/src/cpp/CLHelper.cpp
@@ -40,6 +40,8 @@
#include "CLHelper.h"
#include "List.h"
#include <map>
+#include <vector>
+#include <stdio.h>
void setMap(std::map<cl_int, const char*>& errorMap) {
errorMap[CL_SUCCESS] = "success";
@@ -129,14 +131,62 @@ void CLHelper::getBuildErr(JNIEnv *jenv, cl_device_id deviceId, cl_program prog
delete []buildLog;
}
-cl_program CLHelper::compile(JNIEnv *jenv, cl_context context, size_t deviceCount, cl_device_id* deviceIds, jstring source, jstring* log, cl_int* status){
- const char *sourceChars = jenv->GetStringUTFChars(source, NULL);
- size_t sourceSize[] = { strlen(sourceChars) };
- cl_program program = clCreateProgramWithSource(context, 1, &sourceChars, sourceSize, status);
- jenv->ReleaseStringUTFChars(source, sourceChars);
- *status = clBuildProgram(program, deviceCount, deviceIds, NULL, NULL, NULL);
+cl_program CLHelper::compile(JNIEnv *jenv, cl_context context, cl_device_id* deviceId, jstring* source, jstring* binaryKey, jstring* log, cl_int* status){
+ using std::map;
+ using std::vector;
+ using std::string;
+
+ static map<string, vector<unsigned char *> > src2bin;
+ static map<string, vector<size_t> > src2len;
+
+ const char* sourceChars = jenv->GetStringUTFChars(*source, NULL);
+ const char* keyChars = jenv->GetStringUTFChars(*binaryKey, NULL);
+ string sourceStr(sourceChars);
+ string keyStr(keyChars);
+
+ size_t sourceLength[] = {sourceStr.length()};
+
+ bool cacheDisabled = jenv->GetStringLength(*binaryKey) == 0;
+
+ cl_program program;
+ bool is_built_from_source = false;
+ bool keyNotFound = src2bin.find(keyStr) == src2bin.end();
+
+ if (cacheDisabled || keyNotFound) {
+ is_built_from_source = true;
+ program = clCreateProgramWithSource(context, 1, &sourceChars, sourceLength, status);
+ }
+ else{
+ cl_int *binary_status = new cl_int[1];
+ program = clCreateProgramWithBinary(context, 1, deviceId, &src2len[keyStr][0], (const unsigned char**)&src2bin[keyStr][0], binary_status, NULL);
+ cl_int theStatus = binary_status[0];
+ if (theStatus != CL_SUCCESS) {
+ getBuildErr(jenv, *deviceId, program, log);
+ }
+ delete[] binary_status;
+ }
+
+ jenv->ReleaseStringUTFChars(*source, sourceChars);
+ jenv->ReleaseStringUTFChars(*binaryKey, keyChars);
+
+ *status = clBuildProgram(program, 1, deviceId, NULL, NULL, NULL);
if(*status == CL_BUILD_PROGRAM_FAILURE) {
- getBuildErr(jenv, *deviceIds, program, log);
+ getBuildErr(jenv, *deviceId, program, log);
+ }
+
+ if(is_built_from_source && !cacheDisabled) {
+ vector<unsigned char *> &bins = src2bin[keyStr];
+ vector<size_t> &lens = src2len[keyStr];
+
+ bins.resize(1);
+ lens.resize(1);
+
+ clGetProgramInfo(program, CL_PROGRAM_BINARY_SIZES, sizeof(size_t), &lens[0], NULL);
+ for(size_t i = 0; i < 1; ++i){
+ bins[i] = new unsigned char[lens[i]];
+ }
+
+ clGetProgramInfo(program, CL_PROGRAM_BINARIES, sizeof(unsigned char*), &bins[0], NULL);
}
return(program);
}
diff --git a/com.amd.aparapi.jni/src/cpp/CLHelper.h b/com.amd.aparapi.jni/src/cpp/CLHelper.h
index d1581e9e297a586e3a2f6ad390c10eeb1040c08f..71f761efe26dfc38163c71c1e07ef69afb923680 100644
--- a/com.amd.aparapi.jni/src/cpp/CLHelper.h
+++ b/com.amd.aparapi.jni/src/cpp/CLHelper.h
@@ -45,7 +45,7 @@ class CLHelper{
public:
static const char *errString(cl_int status);
static void getBuildErr(JNIEnv *jenv, cl_device_id deviceId, cl_program program, jstring *log);
- static cl_program compile(JNIEnv *jenv, cl_context context, size_t deviceCount, cl_device_id* deviceId, jstring source, jstring* log, cl_int *status);
+ static cl_program compile(JNIEnv *jenv, cl_context context, cl_device_id* deviceId, jstring* source, jstring* binaryKey, jstring* log, cl_int *status);
static jstring getExtensions(JNIEnv *jenv, cl_device_id deviceId, cl_int *status);
};
diff --git a/com.amd.aparapi.jni/src/cpp/invoke/OpenCLJNI.cpp b/com.amd.aparapi.jni/src/cpp/invoke/OpenCLJNI.cpp
index ccfa62bfbae9254f8821dac1fa436380efeb6695..b637a390736f83d7d5f5e5d3f0ab8eaf32c51079 100644
--- a/com.amd.aparapi.jni/src/cpp/invoke/OpenCLJNI.cpp
+++ b/com.amd.aparapi.jni/src/cpp/invoke/OpenCLJNI.cpp
@@ -88,7 +88,7 @@ void OpenCLRange::fill(JNIEnv *jenv, jobject rangeInstance, jint dims, size_t* o
}
JNI_JAVA(jobject, OpenCLJNI, createProgram)
- (JNIEnv *jenv, jobject jobj, jobject deviceInstance, jstring source) {
+ (JNIEnv *jenv, jobject jobj, jobject deviceInstance, jstring source, jstring binaryKey) {
jobject platformInstance = OpenCLDevice::getPlatformInstance(jenv, deviceInstance);
cl_platform_id platformId = OpenCLPlatform::getPlatformId(jenv, platformInstance);
@@ -105,7 +105,7 @@ JNI_JAVA(jobject, OpenCLJNI, createProgram)
jstring log=NULL;
- cl_program program = CLHelper::compile(jenv, context, 1, &deviceId, source, &log, &status);
+ cl_program program = CLHelper::compile(jenv, context, &deviceId, &source, &binaryKey, &log, &status);
cl_command_queue queue = NULL;
if(status == CL_SUCCESS) {
cl_command_queue_properties queue_props = CL_QUEUE_PROFILING_ENABLE;
diff --git a/com.amd.aparapi.jni/src/cpp/runKernel/Aparapi.cpp b/com.amd.aparapi.jni/src/cpp/runKernel/Aparapi.cpp
index cbad7e81539f48989f847e32edc5ebf643f2b413..eb404c523e909a00a609b35af1d7edf0d1de23a4 100644
--- a/com.amd.aparapi.jni/src/cpp/runKernel/Aparapi.cpp
+++ b/com.amd.aparapi.jni/src/cpp/runKernel/Aparapi.cpp
@@ -52,7 +52,7 @@
static const int PASS_ID_PREPARING_EXECUTION = -2;
static const int PASS_ID_COMPLETED_EXECUTION = -1;
-static const int CANCEL_STATUS_FALSE = 0;
+static const int CANCEL_STATUS_FALSE = 0;
static const int CANCEL_STATUS_TRUE = 1;
//compiler dependant code
@@ -1198,7 +1198,7 @@ void writeProfile(JNIEnv* jenv, JNIContext* jniContext) {
}
JNI_JAVA(jlong, KernelRunnerJNI, buildProgramJNI)
- (JNIEnv *jenv, jobject jobj, jlong jniContextHandle, jstring source) {
+ (JNIEnv *jenv, jobject jobj, jlong jniContextHandle, jstring source, jstring binaryKey) {
JNIContext* jniContext = JNIContext::getJNIContext(jniContextHandle);
if (jniContext == NULL){
return 0;
@@ -1207,7 +1207,7 @@ JNI_JAVA(jlong, KernelRunnerJNI, buildProgramJNI)
try {
cl_int status = CL_SUCCESS;
- jniContext->program = CLHelper::compile(jenv, jniContext->context, 1, &jniContext->deviceId, source, NULL, &status);
+ jniContext->program = CLHelper::compile(jenv, jniContext->context, &jniContext->deviceId, &source, &binaryKey, NULL, &status);
if(status == CL_BUILD_PROGRAM_FAILURE) throw CLException(status, "");
diff --git a/com.amd.aparapi/src/java/com/amd/aparapi/Config.java b/com.amd.aparapi/src/java/com/amd/aparapi/Config.java
index fbae39bb67e433a2983a77dc7f7326ed87897a0e..6f08fbd80f47b505e4f1f480fbb61f9ddb2b5400 100644
--- a/com.amd.aparapi/src/java/com/amd/aparapi/Config.java
+++ b/com.amd.aparapi/src/java/com/amd/aparapi/Config.java
@@ -106,6 +106,14 @@ public class Config extends ConfigJNI{
*/
public static final boolean dumpProfilesOnExit = Boolean.getBoolean(propPkgName + ".dumpProfilesOnExit");
+ /**
+ * Dumps profiling info (for a single execution) after every Kernel execution.
+ *
+ * Usage -Dcom.amd.aparapi.dumpProfileOnExecution={true|false}
+ *
+ */
+ public static final boolean dumpProfileOnExecution = Boolean.getBoolean(propPkgName + ".dumpProfileOnExecution");
+
// Pragma/OpenCL codegen related flags
public static final boolean enableAtomic32 = Boolean.getBoolean(propPkgName + ".enableAtomic32");
diff --git a/com.amd.aparapi/src/java/com/amd/aparapi/Kernel.java b/com.amd.aparapi/src/java/com/amd/aparapi/Kernel.java
index 8bead23faddde914beb74a491a53ad23d1d03864..df9f7c463b4247a398a31ca4367d61da64d79ead 100644
--- a/com.amd.aparapi/src/java/com/amd/aparapi/Kernel.java
+++ b/com.amd.aparapi/src/java/com/amd/aparapi/Kernel.java
@@ -464,6 +464,8 @@ public abstract class Kernel implements Cloneable {
private KernelRunner kernelRunner = null;
+ private boolean autoCleanUpArrays = false;
+
private KernelState kernelState = new KernelState();
/**
@@ -2110,6 +2112,33 @@ public abstract class Kernel implements Cloneable {
return prepareKernelRunner().execute(_entrypoint, _range, _passes);
}
+ public boolean isAutoCleanUpArrays() {
+ return autoCleanUpArrays;
+ }
+
+ /**
+ * Property which if true enables automatic calling of {@link #cleanUpArrays()} following each execution.
+ */
+ public void setAutoCleanUpArrays(boolean autoCleanUpArrays) {
+ this.autoCleanUpArrays = autoCleanUpArrays;
+ }
+
+ /**
+ * Frees the bulk of the resources used by this kernel, by setting array sizes in non-primitive {@link KernelArg}s to 1 (0 size is prohibited) and invoking kernel
+ * execution on a zero size range. Unlike {@link #dispose()}, this does not prohibit further invocations of this kernel, as sundry resources such as OpenCL queues are
+ * <b>not</b> freed by this method.
+ *
+ * <p>This allows a "dormant" Kernel to remain in existence without undue strain on GPU resources, which may be strongly preferable to disposing a Kernel and
+ * recreating another one later, as creation/use of a new Kernel (specifically creation of its associated OpenCL context) is expensive.</p>
+ *
+ * <p>Note that where the underlying array field is declared final, for obvious reasons it is not resized to zero.</p>
+ */
+ public synchronized void cleanUpArrays() {
+ if (kernelRunner != null) {
+ kernelRunner.cleanUpArrays();
+ }
+ }
+
/**
* Release any resources associated with this Kernel.
* <p>
@@ -2125,6 +2154,12 @@ public abstract class Kernel implements Cloneable {
}
}
+ /** Automatically releases any resources associated with this Kernel when the Kernel is garbage collected. */
+ @Override
+ protected void finalize() {
+ dispose();
+ }
+
public boolean isRunningCL() {
return getTargetDevice() instanceof OpenCLDevice;
}
diff --git a/com.amd.aparapi/src/java/com/amd/aparapi/Range.java b/com.amd.aparapi/src/java/com/amd/aparapi/Range.java
index 75db2c245b680a1e4f4b9d134a07f048292755d6..3d6aef9a7973f0bc089243ef887b2d3da6a0d552 100644
--- a/com.amd.aparapi/src/java/com/amd/aparapi/Range.java
+++ b/com.amd.aparapi/src/java/com/amd/aparapi/Range.java
@@ -140,6 +140,11 @@ public class Range extends RangeJNI{
public static Range create(Device _device, int _globalWidth) {
final Range withoutLocal = create(_device, _globalWidth, 1);
+ if (_globalWidth == 0) {
+ withoutLocal.setLocalIsDerived(true);
+ return withoutLocal;
+ }
+
if (withoutLocal.isValid()) {
withoutLocal.setLocalIsDerived(true);
final int[] factors = getFactors(withoutLocal.getGlobalSize_0(), withoutLocal.getMaxWorkItemSize()[0]);
diff --git a/com.amd.aparapi/src/java/com/amd/aparapi/internal/jni/KernelArgJNI.java b/com.amd.aparapi/src/java/com/amd/aparapi/internal/jni/KernelArgJNI.java
index 270321ff7c58508e4016eb82dc3afadbabeb1b81..895d1ff9e5f17cdc59deaabb2bf27fb180090cdf 100644
--- a/com.amd.aparapi/src/java/com/amd/aparapi/internal/jni/KernelArgJNI.java
+++ b/com.amd.aparapi/src/java/com/amd/aparapi/internal/jni/KernelArgJNI.java
@@ -1,8 +1,8 @@
package com.amd.aparapi.internal.jni;
-import java.lang.reflect.Field;
+import com.amd.aparapi.internal.annotation.*;
-import com.amd.aparapi.internal.annotation.UsedByJNICode;
+import java.lang.reflect.*;
/**
* This class is intended to be used as a 'proxy' or 'facade' object for Java code to interact with JNI
@@ -12,28 +12,25 @@ public abstract class KernelArgJNI{
/**
* The type of this KernelArg. Created by or-ing appropriate flags
*
- * @see ARG_BOOLEAN
- * @see ARG_BYTE
- * @see ARG_CHAR
- * @see ARG_FLOAT
- * @see ARG_INT
- * @see ARG_DOUBLE
- * @see ARG_LONG
- * @see ARG_SHORT
- * @see ARG_ARRAY
- * @see ARG_PRIMITIVE
- * @see ARG_READ
- * @see ARG_WRITE
- * @see ARG_LOCAL
- * @see ARG_GLOBAL
- * @see ARG_CONSTANT
- * @see ARG_ARRAYLENGTH
- * @see ARG_APARAPI_BUF
- * @see ARG_EXPLICIT
- * @see ARG_EXPLICIT_WRITE
- * @see ARG_OBJ_ARRAY_STRUCT
- * @see ARG_APARAPI_BUF_HAS_ARRAY
- * @see ARG_APARAPI_BUF_IS_DIRECT
+ * @see KernelRunnerJNI#ARG_BOOLEAN
+ * @see KernelRunnerJNI#ARG_BYTE
+ * @see KernelRunnerJNI#ARG_CHAR
+ * @see KernelRunnerJNI#ARG_FLOAT
+ * @see KernelRunnerJNI#ARG_INT
+ * @see KernelRunnerJNI#ARG_DOUBLE
+ * @see KernelRunnerJNI#ARG_LONG
+ * @see KernelRunnerJNI#ARG_SHORT
+ * @see KernelRunnerJNI#ARG_ARRAY
+ * @see KernelRunnerJNI#ARG_PRIMITIVE
+ * @see KernelRunnerJNI#ARG_READ
+ * @see KernelRunnerJNI#ARG_WRITE
+ * @see KernelRunnerJNI#ARG_LOCAL
+ * @see KernelRunnerJNI#ARG_GLOBAL
+ * @see KernelRunnerJNI#ARG_CONSTANT
+ * @see KernelRunnerJNI#ARG_ARRAYLENGTH
+ * @see KernelRunnerJNI#ARG_EXPLICIT
+ * @see KernelRunnerJNI#ARG_EXPLICIT_WRITE
+ * @see KernelRunnerJNI#ARG_OBJ_ARRAY_STRUCT
*/
@UsedByJNICode protected int type;
diff --git a/com.amd.aparapi/src/java/com/amd/aparapi/internal/jni/KernelRunnerJNI.java b/com.amd.aparapi/src/java/com/amd/aparapi/internal/jni/KernelRunnerJNI.java
index 923875ee51bdf4bab77d550bf36f40b06b7883bd..7b83bb9b4ff345fd9caf40452ce706a3ee9ef34e 100644
--- a/com.amd.aparapi/src/java/com/amd/aparapi/internal/jni/KernelRunnerJNI.java
+++ b/com.amd.aparapi/src/java/com/amd/aparapi/internal/jni/KernelRunnerJNI.java
@@ -307,7 +307,15 @@ public abstract class KernelRunnerJNI{
protected native int getJNI(long _jniContextHandle, Object _array);
- protected native long buildProgramJNI(long _jniContextHandle, String _source);
+ /**
+ * @param _source The OpenCL source code to compile, which may be sent empty if the binary for that source code is known to be cached on the JNI side
+ * under the key {@code _binaryKey}.
+ * @param _binaryKey A key which embodies a Kernel class and a Device, under which the JNI side will cache the compiled binary corresponding to that Kernel/Device
+ * pair. Once a certain _binaryKey has been passed to this method once, further calls to this method with that key will ignore the _source (which
+ * can be passed empty) andused the cached binary.
+ * <p>By passing an empty String as the _binaryKey, the entire JNI-side binary caching apparatus can be disabled.
+ */
+ protected native long buildProgramJNI(long _jniContextHandle, String _source, String _binaryKey);
protected native int setArgsJNI(long _jniContextHandle, KernelArgJNI[] _args, int argc);
diff --git a/com.amd.aparapi/src/java/com/amd/aparapi/internal/kernel/KernelDeviceProfile.java b/com.amd.aparapi/src/java/com/amd/aparapi/internal/kernel/KernelDeviceProfile.java
index 577c13d93e6e5f97740591c1005c4a27ce5921d4..55c4ee5043e9028b9147d2a496a803a79b4af1b4 100644
--- a/com.amd.aparapi/src/java/com/amd/aparapi/internal/kernel/KernelDeviceProfile.java
+++ b/com.amd.aparapi/src/java/com/amd/aparapi/internal/kernel/KernelDeviceProfile.java
@@ -17,6 +17,7 @@ public class KernelDeviceProfile {
private static final int TABLE_COLUMN_HEADER_WIDTH = 21;
private static final int TABLE_COLUMN_COUNT_WIDTH = 8;
private static final int TABLE_COLUMN_WIDTH;
+ private static String tableHeader = null;
private final Class<? extends Kernel> kernel;
private final Device device;
private long[] currentTimes = new long[ProfilingEvent.values().length];
@@ -106,17 +107,20 @@ public class KernelDeviceProfile {
return sum;
}
- public static String getTableHeader() {
- int length = ProfilingEvent.values().length;
- StringBuilder builder = new StringBuilder(150);
- appendRowHeaders(builder, "Device", "Count");
- for (int i = 1; i < length; ++i) {
- ProfilingEvent stage = ProfilingEvent.values()[i];
- String heading = stage.name();
- appendCell(builder, heading);
+ public static synchronized String getTableHeader() {
+ if (tableHeader == null) {
+ int length = ProfilingEvent.values().length;
+ StringBuilder builder = new StringBuilder(150);
+ appendRowHeaders(builder, "Device", "Count");
+ for (int i = 1; i < length; ++i) {
+ ProfilingEvent stage = ProfilingEvent.values()[i];
+ String heading = stage.name();
+ appendCell(builder, heading);
+ }
+ builder.append(" ").append("Total");
+ tableHeader = builder.toString();
}
- builder.append(" ").append("Total");
- return builder.toString();
+ return tableHeader;
}
public String getLastAsTableRow() {
diff --git a/com.amd.aparapi/src/java/com/amd/aparapi/internal/kernel/KernelManager.java b/com.amd.aparapi/src/java/com/amd/aparapi/internal/kernel/KernelManager.java
index 3ced0ae0361ff83686972c6c695b8ba6a8d71345..2b5dc2e2af3474649d22b192fdc662e7a0088d86 100644
--- a/com.amd.aparapi/src/java/com/amd/aparapi/internal/kernel/KernelManager.java
+++ b/com.amd.aparapi/src/java/com/amd/aparapi/internal/kernel/KernelManager.java
@@ -47,11 +47,16 @@ public class KernelManager {
/** This method returns a shared instance of a given Kernel subclass. The kernelClass needs a no-args constructor, which
* need not be public.
*
- * <p>Given that compilation of OpenCL is relatively expensive and that (currently!) there is no caching of compiled OpenCL
- * it is desirable to only ever create one instance of any given Kernel subclass, which this method facilitates.</p>
+ * <p>Each new Kernel instance requires a new JNIContext, the creation of which is expensive. There is apparently no simple solution by which a cached JNIContext can be reused
+ * for all instances of a given Kernel class, since it is intimately connected with resource aquisition and release. In the absence of a context caching solution, it is often
+ * highly desirable to only ever create one instance of any given Kernel subclass, which this method facilitates.</p>
*
- * <p>In order to maintain thread saftey, it is necessary to synchronize on the returned kernel for the duration of the process of setting up,
- * executing and extracting the results from that kernel, when using a shared instance.</p>
+ * <p>In order to maintain thread saftey when using a shared instance, it is necessary to synchronize on the returned kernel for the duration of the process of setting up,
+ * executing and extracting the results from that kernel.</p>
+ *
+ * <p>This method instantiates a Kernel (per Kernel class) via Reflection, and thus can only be used where the Kernel class has a no-args constructor, which need not be public.
+ * In fact, if a Kernel subclass is designed to be used in conjunction with this method, it is recommended that its <b>only</b> constructor is a <b>private</b> no-args constructor.
+ * </p>
*
* @throws RuntimeException if the class cannot be instantiated
*/
diff --git a/com.amd.aparapi/src/java/com/amd/aparapi/internal/kernel/KernelProfile.java b/com.amd.aparapi/src/java/com/amd/aparapi/internal/kernel/KernelProfile.java
index 647ab5133fc3d50fa24247b1508d080eeaafdf7b..3d1caaa11906ae2fcccacbce59050b0d4b8c86c7 100644
--- a/com.amd.aparapi/src/java/com/amd/aparapi/internal/kernel/KernelProfile.java
+++ b/com.amd.aparapi/src/java/com/amd/aparapi/internal/kernel/KernelProfile.java
@@ -12,6 +12,7 @@ import java.util.logging.*;
*/
public class KernelProfile {
+ private static final double MILLION = 1000000d;
private static Logger logger = Logger.getLogger(Config.getLoggerName());
private final Class<? extends Kernel> kernelClass;
private LinkedHashMap<Device, KernelDeviceProfile> deviceProfiles = new LinkedHashMap<>();
@@ -25,12 +26,13 @@ public class KernelProfile {
public double getLastExecutionTime() {
KernelDeviceProfile lastDeviceProfile = getLastDeviceProfile();
- return lastDeviceProfile == null ? Double.NaN : lastDeviceProfile.getLastElapsedTime(ProfilingEvent.START, ProfilingEvent.EXECUTED);
+ return lastDeviceProfile == null ? Double.NaN : lastDeviceProfile.getLastElapsedTime(ProfilingEvent.START, ProfilingEvent.EXECUTED) / MILLION;
}
public double getLastConversionTime() {
KernelDeviceProfile lastDeviceProfile = getLastDeviceProfile();
- return lastDeviceProfile == null ? Double.NaN : lastDeviceProfile.getLastElapsedTime(ProfilingEvent.START, ProfilingEvent.EXECUTED); }
+ return lastDeviceProfile == null ? Double.NaN : lastDeviceProfile.getLastElapsedTime(ProfilingEvent.START, ProfilingEvent.EXECUTED) / MILLION;
+ }
public double getAccumulatedTotalTime() {
KernelDeviceProfile lastDeviceProfile = getLastDeviceProfile();
@@ -38,12 +40,12 @@ public class KernelProfile {
return Double.NaN;
}
else {
- return lastDeviceProfile.getCumulativeElapsedTimeAll();
+ return lastDeviceProfile.getCumulativeElapsedTimeAll() / MILLION;
}
}
- private KernelDeviceProfile getLastDeviceProfile() {
- return null;
+ public KernelDeviceProfile getLastDeviceProfile() {
+ return deviceProfiles.get(currentDevice);
}
void onStart(Device device) {
@@ -61,10 +63,11 @@ public class KernelProfile {
void onEvent(ProfilingEvent event) {
switch (event) {
case CLASS_MODEL_BUILT: // fallthrough
- case OPENCL_GENERATED: // fallthrough
- case OPENCL_COMPILED: // fallthrough
- case PREPARE_EXECUTE: // fallthrough
- case EXECUTED: // fallthrough
+ case OPENCL_GENERATED: // fallthrough
+ case INIT_JNI: // fallthrough
+ case OPENCL_COMPILED: // fallthrough
+ case PREPARE_EXECUTE: // fallthrough
+ case EXECUTED: // fallthrough
{
if (currentDeviceProfile == null) {
logger.log(Level.SEVERE, "Error in KernelProfile, no currentDevice (synchronization error?");
diff --git a/com.amd.aparapi/src/java/com/amd/aparapi/internal/kernel/KernelRunner.java b/com.amd.aparapi/src/java/com/amd/aparapi/internal/kernel/KernelRunner.java
index f162d695ed5130737b525cdbc707f49b41d56b30..7f250d0fe0ab949e51408df82bb2766ff4f12ec5 100644
--- a/com.amd.aparapi/src/java/com/amd/aparapi/internal/kernel/KernelRunner.java
+++ b/com.amd.aparapi/src/java/com/amd/aparapi/internal/kernel/KernelRunner.java
@@ -75,6 +75,10 @@ import java.util.logging.*;
*/
public class KernelRunner extends KernelRunnerJNI{
+ public static boolean BINARY_CACHING_DISABLED = false;
+
+ private static final int MINIMUM_ARRAY_SIZE = 1;
+
/** @see #getCurrentPass() */
@UsedByJNICode public static final int PASS_ID_PREPARING_EXECUTION = -2;
/** @see #getCurrentPass() */
@@ -129,6 +133,7 @@ public class KernelRunner extends KernelRunnerJNI{
private static final ForkJoinPool threadPool = new ForkJoinPool(Runtime.getRuntime().availableProcessors(),
lowPriorityThreadFactory, null, false);
private static HashMap<Class<? extends Kernel>, String> openCLCache = new HashMap<>();
+ private static LinkedHashSet<String> seenBinaryKeys = new LinkedHashSet<>();
/**
* Create a KernelRunner for a specific Kernel instance.
@@ -147,7 +152,34 @@ public class KernelRunner extends KernelRunnerJNI{
inBufferRemoteInt = inBufferRemote.asIntBuffer();
outBufferRemoteInt = outBufferRemote.asIntBuffer();
- KernelManager.instance(); // ensures static initialization of KernalManager
+ KernelManager.instance(); // ensures static initialization of KernelManager
+ }
+
+ /**
+ * @see Kernel#cleanUpArrays().
+ */
+ public void cleanUpArrays() {
+ if (args != null && kernel.isRunningCL()) {
+ for (KernelArg arg : args) {
+ if ((arg.getType() & KernelRunnerJNI.ARG_ARRAY) != 0) {
+ Field field = arg.getField();
+ if (field != null && field.getType().isArray() && !Modifier.isFinal(field.getModifiers())) {
+ field.setAccessible(true);
+ Class<?> componentType = field.getType().getComponentType();
+ Object newValue = Array.newInstance(componentType, MINIMUM_ARRAY_SIZE);
+ try {
+ field.set(kernel, newValue);
+ }
+ catch (IllegalAccessException e) {
+ throw new RuntimeException(e);
+ }
+ }
+ }
+ }
+ kernel.execute(0);
+ } else if (kernel.isRunningCL()) {
+ logger.log(Level.SEVERE, "KernelRunner#cleanUpArrays() could not execute as no args available (Kernel has not been executed?)");
+ }
}
/**
@@ -156,7 +188,7 @@ public class KernelRunner extends KernelRunnerJNI{
* @see KernelRunnerJNI#disposeJNI(long)
*/
public void dispose() {
- if (kernel.isRunningCL()) {
+ if (args != null || kernel.isRunningCL()) {
disposeJNI(jniContextHandle);
}
// We are using a shared pool, so there's no need no shutdown it when kernel is disposed
@@ -1005,7 +1037,7 @@ public class KernelRunner extends KernelRunnerJNI{
kernel.setFallbackExecutionMode();
}
recreateRange(_settings);
- return executeInternal(_settings);
+ return executeInternalInner(_settings);
}
private void recreateRange(ExecutionSettings _settings) {
@@ -1075,33 +1107,23 @@ public class KernelRunner extends KernelRunnerJNI{
}
recreateRange(_settings);
- return executeInternal(_settings);
- }
-
- private String describeDevice() {
- Device device = KernelManager.instance().getPreferences(kernel).getPreferredDevice(kernel);
- return (device == null) ? "<default fallback>" : device.getShortDescription();
- }
-
- @Override
- public String toString() {
- return "KernelRunner{" + kernel + "}";
+ return executeInternalInner(_settings);
}
@SuppressWarnings("deprecation")
public synchronized Kernel execute(String _entrypoint, final Range _range, final int _passes) {
executing = true;
- clearCancelMultiPass();
- KernelProfile profile = KernelManager.instance().getProfile(kernel.getClass());
- KernelPreferences preferences = KernelManager.instance().getPreferences(kernel);
- boolean legacyExecutionMode = kernel.getExecutionMode() != Kernel.EXECUTION_MODE.AUTO;
-
- ExecutionSettings settings = new ExecutionSettings(preferences, profile, _entrypoint, _range, _passes, legacyExecutionMode);
try {
+ clearCancelMultiPass();
+ KernelProfile profile = KernelManager.instance().getProfile(kernel.getClass());
+ KernelPreferences preferences = KernelManager.instance().getPreferences(kernel);
+ boolean legacyExecutionMode = kernel.getExecutionMode() != Kernel.EXECUTION_MODE.AUTO;
+
+ ExecutionSettings settings = new ExecutionSettings(preferences, profile, _entrypoint, _range, _passes, legacyExecutionMode);
// Two Kernels of the same class share the same KernelPreferences object, and since failure (fallback) generally mutates
// the preferences object, we must lock it. Note this prevents two Kernels of the same class executing simultaneously.
synchronized (preferences) {
- return executeInternal(settings);
+ return executeInternalOuter(settings);
}
} finally {
executing = false;
@@ -1109,8 +1131,18 @@ public class KernelRunner extends KernelRunnerJNI{
}
}
+ private synchronized Kernel executeInternalOuter(ExecutionSettings _settings) {
+ try {
+ return executeInternalInner(_settings);
+ } finally {
+ if (kernel.isAutoCleanUpArrays() &&_settings.range.getGlobalSize_0() != 0) {
+ cleanUpArrays();
+ }
+ }
+ }
+
@SuppressWarnings("deprecation")
- private synchronized Kernel executeInternal(ExecutionSettings _settings) {
+ private synchronized Kernel executeInternalInner(ExecutionSettings _settings) {
if (_settings.range == null) {
throw new IllegalStateException("range can't be null");
@@ -1119,7 +1151,7 @@ public class KernelRunner extends KernelRunnerJNI{
EXECUTION_MODE requestedExecutionMode = kernel.getExecutionMode();
if (requestedExecutionMode.isOpenCL() && _settings.range.getDevice() != null && !(_settings.range.getDevice() instanceof OpenCLDevice)) {
- fallBackToNextDevice(_settings, "OpenCL was requested but Device supplied was not an OpenCLDevice");
+ fallBackToNextDevice(_settings, "OpenCL EXECUTION_MODE was requested but Device supplied was not an OpenCLDevice");
}
Device device = _settings.range.getDevice();
@@ -1151,9 +1183,6 @@ public class KernelRunner extends KernelRunnerJNI{
OpenCLDevice openCLDevice = device instanceof OpenCLDevice ? (OpenCLDevice) device : null;
int jniFlags = 0;
- if (_settings.legacyExecutionMode && device != null && !(device instanceof OpenCLDevice)) {
- hashCode();
- }
// for legacy reasons use old logic where Kernel.EXECUTION_MODE is not AUTO
if (_settings.legacyExecutionMode && !userSpecifiedDevice && requestedExecutionMode.isOpenCL()) {
if (requestedExecutionMode.equals(EXECUTION_MODE.GPU)) {
@@ -1214,9 +1243,8 @@ public class KernelRunner extends KernelRunnerJNI{
// jniFlags |= (kernel.getExecutionMode().equals(Kernel.EXECUTION_MODE.GPU) ? JNI_FLAG_USE_GPU : 0);
// Init the device to check capabilities before emitting the
// code that requires the capabilities.
-
- // synchronized(Kernel.class){
jniContextHandle = initJNI(kernel, openCLDevice, jniFlags); // openCLDevice will not be null here
+ _settings.profile.onEvent(ProfilingEvent.INIT_JNI);
} // end of synchronized! issue 68
if (jniContextHandle == 0) {
@@ -1282,8 +1310,26 @@ public class KernelRunner extends KernelRunnerJNI{
}
}
- // Send the string to OpenCL to compile it
- long handle = buildProgramJNI(jniContextHandle, openCL);
+ // Send the string to OpenCL to compile it, or if the compiled binary is already cached on JNI side just empty string to use cached binary
+ long handle;
+ if (BINARY_CACHING_DISABLED) {
+ handle = buildProgramJNI(jniContextHandle, openCL, "");
+ } else {
+ synchronized (seenBinaryKeys) {
+ String binaryKey = kernel.getClass().getName() + ":" + device.getDeviceId();
+ if (seenBinaryKeys.contains(binaryKey)) {
+ // use cached binary
+ logger.log(Level.INFO, "reusing cached binary for " + binaryKey);
+ handle = buildProgramJNI(jniContextHandle, "", binaryKey);
+ }
+ else {
+ // create and cache binary
+ logger.log(Level.INFO, "compiling new binary for " + binaryKey);
+ handle = buildProgramJNI(jniContextHandle, openCL, binaryKey);
+ seenBinaryKeys.add(binaryKey);
+ }
+ }
+ }
_settings.profile.onEvent(ProfilingEvent.OPENCL_COMPILED);
if (handle == 0) {
return fallBackToNextDevice(_settings, "OpenCL compile failed");
@@ -1446,6 +1492,26 @@ public class KernelRunner extends KernelRunnerJNI{
}
finally {
_settings.profile.onEvent(ProfilingEvent.EXECUTED);
+ maybeReportProfile(_settings);
+ }
+ }
+
+ @Override
+ public String toString() {
+ return "KernelRunner{" + kernel + "}";
+ }
+
+ private String describeDevice() {
+ Device device = KernelManager.instance().getPreferences(kernel).getPreferredDevice(kernel);
+ return (device == null) ? "<default fallback>" : device.getShortDescription();
+ }
+
+ private void maybeReportProfile(ExecutionSettings _settings) {
+ if (Config.dumpProfileOnExecution) {
+ StringBuilder report = new StringBuilder();
+ report.append(KernelDeviceProfile.getTableHeader()).append('\n');
+ report.append(_settings.profile.getLastDeviceProfile().getLastAsTableRow());
+ System.out.println(report);
}
}
diff --git a/com.amd.aparapi/src/java/com/amd/aparapi/internal/kernel/ProfilingEvent.java b/com.amd.aparapi/src/java/com/amd/aparapi/internal/kernel/ProfilingEvent.java
index fcb06bfd38a478b80d899c52566a7d5660c105a0..4e1d01d0a524f3a7b2075891b6bbd877ad6cf3b1 100644
--- a/com.amd.aparapi/src/java/com/amd/aparapi/internal/kernel/ProfilingEvent.java
+++ b/com.amd.aparapi/src/java/com/amd/aparapi/internal/kernel/ProfilingEvent.java
@@ -4,5 +4,5 @@ package com.amd.aparapi.internal.kernel;
* Created by Barney on 02/09/2015.
*/
public enum ProfilingEvent {
- START, CLASS_MODEL_BUILT, OPENCL_GENERATED, OPENCL_COMPILED, PREPARE_EXECUTE, EXECUTED
+ START, CLASS_MODEL_BUILT, INIT_JNI, OPENCL_GENERATED, OPENCL_COMPILED, PREPARE_EXECUTE, EXECUTED
}
diff --git a/com.amd.aparapi/src/java/com/amd/aparapi/internal/model/ClassModel.java b/com.amd.aparapi/src/java/com/amd/aparapi/internal/model/ClassModel.java
index 132f4f21ae49d9371e19914bfc03805f5aceb880..d3db6a62c37b620384f16609afbeed6b2692ce7f 100644
--- a/com.amd.aparapi/src/java/com/amd/aparapi/internal/model/ClassModel.java
+++ b/com.amd.aparapi/src/java/com/amd/aparapi/internal/model/ClassModel.java
@@ -65,9 +65,9 @@ import java.util.logging.*;
* @author gfrost
*
*/
-public class ClassModel{
+public class ClassModel {
- public interface LocalVariableInfo{
+ public interface LocalVariableInfo {
int getStart();
@@ -142,6 +142,7 @@ public class ClassModel{
});
// private ValueCache<String, Integer, ClassParseException> privateMemorySizes = ValueCache.on(this::computePrivateMemorySize);
+
private ValueCache<String, Integer, ClassParseException> privateMemorySizes = ValueCache
.on(new ThrowingValueComputer<String, Integer, ClassParseException>(){
@Override
@@ -635,19 +636,25 @@ public class ClassModel{
return (methodDescription);
}
- // private static final ValueCache<Class<?>, ClassModel, ClassParseException> classModelCache = ValueCache.onIdentity(ClassModel::new);
private static final ValueCache<Class<?>, ClassModel, ClassParseException> classModelCache = ValueCache
.on(new ThrowingValueComputer<Class<?>, ClassModel, ClassParseException>(){
@Override
public ClassModel compute(Class<?> key) throws ClassParseException {
- return new ClassModel(key);
+ return createClassModelInternal(key);
}
});
+ private static ClassModel createClassModelInternal(Class<?> key) throws ClassParseException {
+ ClassModel classModel = new ClassModel(key);
+ return classModel;
+ }
+
public static ClassModel createClassModel(Class<?> _class) throws ClassParseException {
- if (CacheEnabler.areCachesEnabled())
+ if (CacheEnabler.areCachesEnabled()) {
return classModelCache.computeIfAbsent(_class);
- return new ClassModel(_class);
+ }
+
+ return createClassModelInternal(_class);
}
private int magic;
@@ -746,7 +753,7 @@ public class ClassModel{
private final List<Entry> entries = new ArrayList<Entry>();
- public abstract class Entry{
+ public abstract class Entry {
private final ConstantPoolType constantPoolType;
private final int slot;
@@ -1559,7 +1566,7 @@ public class ClassModel{
}
}
- public class AttributePool{
+ public class AttributePool {
private final List<AttributePoolEntry> attributePoolEntries = new ArrayList<AttributePoolEntry>();
public class CodeEntry extends AttributePoolEntry{
@@ -1672,7 +1679,7 @@ public class ClassModel{
}
}
- public abstract class AttributePoolEntry{
+ public abstract class AttributePoolEntry {
protected int length;
protected int nameIndex;
@@ -1727,7 +1734,7 @@ public class ClassModel{
}
public class InnerClassesEntry extends PoolEntry<InnerClassesEntry.InnerClassInfo>{
- public class InnerClassInfo{
+ public class InnerClassInfo {
private final int innerAccess;
private final int innerIndex;
@@ -1771,7 +1778,7 @@ public class ClassModel{
public class LineNumberTableEntry extends PoolEntry<LineNumberTableEntry.StartLineNumberPair>{
- public class StartLineNumberPair{
+ public class StartLineNumberPair {
private final int lineNumber;
private final int start;
@@ -2090,13 +2097,13 @@ public class ClassModel{
public class RuntimeAnnotationsEntry extends PoolEntry<RuntimeAnnotationsEntry.AnnotationInfo>{
- public class AnnotationInfo{
+ public class AnnotationInfo {
private final int typeIndex;
private final int elementValuePairCount;
public class ElementValuePair{
- class Value{
+ class Value {
Value(int _tag) {
tag = _tag;
}
@@ -2383,7 +2390,7 @@ public class ClassModel{
private static ClassLoader classModelLoader = ClassModel.class.getClassLoader();
- public class ClassModelField{
+ public class ClassModelField {
private final int fieldAccessFlags;
AttributePool fieldAttributePool;
@@ -2450,7 +2457,7 @@ public class ClassModel{
}
}
- public class ClassModelMethod{
+ public class ClassModelMethod {
private final int methodAccessFlags;
@@ -2554,7 +2561,7 @@ public class ClassModel{
}
}
- public class ClassModelInterface{
+ public class ClassModelInterface {
private final int interfaceIndex;
ClassModelInterface(ByteReader _byteReader) {
@@ -2805,7 +2812,10 @@ public class ClassModel{
Entrypoint getEntrypoint(String _entrypointName, String _descriptor, Object _k) throws AparapiException {
if (CacheEnabler.areCachesEnabled()) {
EntrypointKey key = EntrypointKey.of(_entrypointName, _descriptor);
+ long s = System.nanoTime();
Entrypoint entrypointWithoutKernel = entrypointCache.computeIfAbsent(key);
+ long e = System.nanoTime() - s;
+ System.out.println("newMethodModel: " + e / 1000000f);
return entrypointWithoutKernel.cloneForKernel(_k);
} else {
final MethodModel method = getMethodModel(_entrypointName, _descriptor);
diff --git a/com.amd.aparapi/src/java/com/amd/aparapi/internal/model/Memoizer.java b/com.amd.aparapi/src/java/com/amd/aparapi/internal/model/Memoizer.java
index ece7e391574fb962f7f28d06e876e97693b2d970..7eec09b7e7a08a606d44712a714c86b6ab064fe8 100644
--- a/com.amd.aparapi/src/java/com/amd/aparapi/internal/model/Memoizer.java
+++ b/com.amd.aparapi/src/java/com/amd/aparapi/internal/model/Memoizer.java
@@ -1,7 +1,7 @@
package com.amd.aparapi.internal.model;
-import java.util.NoSuchElementException;
-import java.util.concurrent.atomic.AtomicReference;
+import java.util.*;
+import java.util.concurrent.atomic.*;
interface Optional<E> {
final class Some<E> implements Optional<E>{
@@ -49,7 +49,7 @@ interface Optional<E> {
boolean isPresent();
}
-public interface Memoizer<T> extends Supplier<T>{
+public interface Memoizer<T> extends Supplier<T> {
public final class Impl<T> implements Memoizer<T>{
private final Supplier<T> supplier;
diff --git a/com.amd.aparapi/src/java/com/amd/aparapi/internal/model/ValueCache.java b/com.amd.aparapi/src/java/com/amd/aparapi/internal/model/ValueCache.java
index ef66a53fdeca66f8da816f12f1d88e360d749303..63906ed0465b9d95150dc3923f05552d9aacaa90 100644
--- a/com.amd.aparapi/src/java/com/amd/aparapi/internal/model/ValueCache.java
+++ b/com.amd.aparapi/src/java/com/amd/aparapi/internal/model/ValueCache.java
@@ -1,9 +1,7 @@
package com.amd.aparapi.internal.model;
-import java.lang.ref.Reference;
-import java.lang.ref.SoftReference;
-import java.util.concurrent.ConcurrentHashMap;
-import java.util.concurrent.ConcurrentMap;
+import java.lang.ref.*;
+import java.util.concurrent.*;
//import java.util.function.Supplier;
@@ -14,7 +12,7 @@ public final class ValueCache<K, V, T extends Throwable> {
}
// @FunctionalInterface
- public interface ValueComputer<K, V> extends ThrowingValueComputer<K, V, RuntimeException>{
+ public interface ValueComputer<K, V> extends ThrowingValueComputer<K, V, RuntimeException> {
// Marker interface
}
diff --git a/samples/blackscholes/src/com/amd/aparapi/sample/blackscholes/Main.java b/samples/blackscholes/src/com/amd/aparapi/sample/blackscholes/Main.java
index bdc1486254513bbc07b61b1cb84e3313debca650..074ed2b013182be33f73aa4325d463c256aecbd0 100644
--- a/samples/blackscholes/src/com/amd/aparapi/sample/blackscholes/Main.java
+++ b/samples/blackscholes/src/com/amd/aparapi/sample/blackscholes/Main.java
@@ -76,7 +76,6 @@ public class Main{
/**
* @brief Abromowitz Stegun approxmimation for PHI (Cumulative Normal Distribution Function)
* @param X input value
- * @param phi pointer to store calculated CND of X
*/
float phi(float X) {
final float c1 = 0.319381530f;
@@ -183,18 +182,15 @@ public class Main{
int size = Integer.getInteger("size", 512);
Range range = Range.create(size);
- int iterations = Integer.getInteger("iterations", 5);
+ int iterations = Integer.getInteger("iterations", 20);
System.out.println("size =" + size);
System.out.println("iterations =" + iterations);
BlackScholesKernel kernel = new BlackScholesKernel(size);
- long totalExecTime = 0;
- long iterExecTime = 0;
- /*
for (int i = 0; i < iterations; i++) {
- iterExecTime = kernel.execute(size).getExecutionTime();
- totalExecTime += iterExecTime;
- }*/
+ kernel.execute(size).getExecutionTime();
+ }
+
kernel.execute(range, iterations);
System.out.println("Average execution time " + kernel.getAccumulatedExecutionTime() / iterations);
kernel.showResults(10);
diff --git a/samples/configuration/src/com/amd/aparapi/sample/configuration/AutoCleanUpArraysDemo.java b/samples/configuration/src/com/amd/aparapi/sample/configuration/AutoCleanUpArraysDemo.java
new file mode 100644
index 0000000000000000000000000000000000000000..c09d0ab218bec2c0a303a77517890397ede4b2d5
--- /dev/null
+++ b/samples/configuration/src/com/amd/aparapi/sample/configuration/AutoCleanUpArraysDemo.java
@@ -0,0 +1,20 @@
+package com.amd.aparapi.sample.configuration;
+
+import com.amd.aparapi.sample.mandel.*;
+
+public class AutoCleanUpArraysDemo {
+ public static void main(String[] ignored) {
+
+ System.setProperty("com.amd.aparapi.dumpProfileOnExecution", "true");
+
+ int size = 1024;
+ int[] rgbs = new int[size * size];
+ Main.MandelKernel kernel = new Main.MandelKernel(size, size, rgbs);
+ kernel.setAutoCleanUpArrays(true);
+ kernel.execute(size * size);
+ System.out.println("length = " + kernel.getRgbs().length);
+ kernel.resetImage(size, size, rgbs);
+ kernel.execute(size * size);
+ System.out.println("length = " + kernel.getRgbs().length);
+ }
+}
diff --git a/samples/configuration/src/com/amd/aparapi/sample/configuration/CleanUpArraysDemo.java b/samples/configuration/src/com/amd/aparapi/sample/configuration/CleanUpArraysDemo.java
new file mode 100644
index 0000000000000000000000000000000000000000..26d832f4b2d1db2be339c6933fff405d642b6a7c
--- /dev/null
+++ b/samples/configuration/src/com/amd/aparapi/sample/configuration/CleanUpArraysDemo.java
@@ -0,0 +1,25 @@
+package com.amd.aparapi.sample.configuration;
+
+import com.amd.aparapi.sample.mandel.*;
+
+public class CleanUpArraysDemo {
+ public static void main(String[] ignored) {
+
+ System.setProperty("com.amd.aparapi.enableVerboseJNI", "true");
+ System.setProperty("com.amd.aparapi.enableVerboseJNIOpenCLResourceTracking", "true");
+ System.setProperty("com.amd.aparapi.enableExecutionModeReporting", "true");
+ System.setProperty("com.amd.aparapi.dumpProfileOnExecution", "true");
+
+ int size = 1024;
+ int[] rgbs = new int[size * size];
+ Main.MandelKernel kernel = new Main.MandelKernel(size, size, rgbs);
+ kernel.execute(size * size);
+ System.out.println("length = " + kernel.getRgbs().length);
+ System.out.println("Cleaning up arrays");
+ kernel.cleanUpArrays();
+ System.out.println("length = " + kernel.getRgbs().length);
+ kernel.resetImage(size, size, rgbs);
+ kernel.execute(size * size);
+ System.out.println("length = " + kernel.getRgbs().length);
+ }
+}
diff --git a/samples/configuration/src/com/amd/aparapi/sample/configuration/ConfigurationDemo.java b/samples/configuration/src/com/amd/aparapi/sample/configuration/ConfigurationDemo.java
index bdfb3cf21d21ad26b50772f56c91d231cc7e352e..67d7cc0296b1432303e80ab8fd39fec477f64891 100644
--- a/samples/configuration/src/com/amd/aparapi/sample/configuration/ConfigurationDemo.java
+++ b/samples/configuration/src/com/amd/aparapi/sample/configuration/ConfigurationDemo.java
@@ -10,16 +10,14 @@ import java.util.*;
*/
public class ConfigurationDemo {
public static void main(String[] ignored) {
- System.setProperty("com.amd.aparapi.dumpProfilesOnExit", "true");
-
StringBuilder report;
List<Integer> tests = Arrays.asList(0, 1, 2, 3);
- int reps = 300;
+ int reps = 1;
for (int rep = 0; rep < reps; ++rep) {
runTests(rep == 0, tests);
- if (rep % 100 == 99 || rep == 0) {
+ if (rep % 100 == 99 || rep == 0 || rep == reps - 1) {
report = new StringBuilder("rep = " + rep + "\n");
KernelManager.instance().reportDeviceUsage(report, true);
System.out.println(report);
diff --git a/samples/configuration/src/com/amd/aparapi/sample/configuration/ProfilingDemo.java b/samples/configuration/src/com/amd/aparapi/sample/configuration/ProfilingDemo.java
new file mode 100644
index 0000000000000000000000000000000000000000..aeea4ea5888c4bcf13b0dddf5fcad7cb05038edc
--- /dev/null
+++ b/samples/configuration/src/com/amd/aparapi/sample/configuration/ProfilingDemo.java
@@ -0,0 +1,83 @@
+package com.amd.aparapi.sample.configuration;
+
+import com.amd.aparapi.*;
+import com.amd.aparapi.internal.kernel.*;
+import com.amd.aparapi.sample.blackscholes.Main.*;
+import com.amd.aparapi.sample.mandel.*;
+
+/**
+ * Demonstrate new enhanced profiling capability, profiling the kernel from the blackscholes sample.
+ */
+public class ProfilingDemo {
+
+ private static BlackScholesKernel kernel;
+
+ public static void main(String[] ignored) {
+
+ final int size = 1024;
+ newBlackScholesKernel(size);
+
+ // first execute an arbitrary Kernel (not the one we are profiling!) a few times to ensure class loading and initial JIT optimisations have
+ // been performed before we start the profiling
+ int warmups = 5;
+ for (int i = 0; i < warmups; ++i) {
+ runWarmup();
+ }
+
+ String tableHeader = KernelDeviceProfile.getTableHeader();
+
+ boolean newKernel = false;
+
+ runOnce(size, newKernel);
+ System.out.println("First run:");
+ printLastProfile(tableHeader);
+
+
+ int reps = 20;
+
+ System.out.println("\nSubsequent runs using same kernel:");
+ for (int rep = 0; rep < reps; ++rep) {
+ runOnce(size, newKernel);
+ printLastProfile(tableHeader);
+ }
+
+ newKernel = true;
+ System.out.println("\nSubsequent runs using new kernels:");
+ for (int rep = 0; rep < reps; ++rep) {
+ runOnce(size, newKernel);
+ printLastProfile(tableHeader);
+ }
+
+ // Note. You will see from the output that there is a substantial cost to Kernel creation (vs Kernel reuse), almost entirely due to KernelRunner#initJNI
+
+ }
+
+ private static void printLastProfile(String tableHeader) {
+ KernelProfile profile = KernelManager.instance().getProfile(BlackScholesKernel.class);
+ KernelDeviceProfile deviceProfile = profile.getLastDeviceProfile();
+ String row = deviceProfile.getLastAsTableRow();
+ System.out.println(tableHeader);
+ System.out.println(row);
+ }
+
+ private static void runOnce(int size, boolean newKernel) {
+ if (newKernel) {
+ newBlackScholesKernel(size);
+ }
+ kernel.execute(size);
+ }
+
+ private static void runWarmup() {
+ int[] rgb = new int[512 * 512];
+ Kernel warmupKernel = new Main.MandelKernel(512, 512, rgb);
+ warmupKernel.execute(512 * 512);
+ }
+
+ private static void newBlackScholesKernel(int size) {
+ if (kernel != null) {
+ kernel.dispose();
+ }
+ System.gc();
+ kernel = new BlackScholesKernel(size);
+ }
+}
diff --git a/samples/configuration/src/com/amd/aparapi/sample/configuration/ProfilingDemoNoBinaryCaching.java b/samples/configuration/src/com/amd/aparapi/sample/configuration/ProfilingDemoNoBinaryCaching.java
new file mode 100644
index 0000000000000000000000000000000000000000..2f3252c68cf63a67e21b2c2603ea1b5d333a25de
--- /dev/null
+++ b/samples/configuration/src/com/amd/aparapi/sample/configuration/ProfilingDemoNoBinaryCaching.java
@@ -0,0 +1,14 @@
+package com.amd.aparapi.sample.configuration;
+
+import com.amd.aparapi.internal.kernel.*;
+
+/**
+ * Created by Barney on 13/09/2015.
+ */
+public class ProfilingDemoNoBinaryCaching {
+
+ public static void main(String[] ignored) {
+ KernelRunner.BINARY_CACHING_DISABLED = true;
+ ProfilingDemo.main(null);
+ }
+}
diff --git a/samples/convolution/src/com/amd/aparapi/sample/convolution/Convolution.java b/samples/convolution/src/com/amd/aparapi/sample/convolution/Convolution.java
index fc70267efa87412914e40580fce6ffb3f9fb66c9..597317a6af365eefe16ab223f5e2b4d0c1164261 100644
--- a/samples/convolution/src/com/amd/aparapi/sample/convolution/Convolution.java
+++ b/samples/convolution/src/com/amd/aparapi/sample/convolution/Convolution.java
@@ -38,15 +38,15 @@ under those regulations, please refer to the U.S. Bureau of Industry and Securit
package com.amd.aparapi.sample.convolution;
-import java.io.File;
+import com.amd.aparapi.*;
-import com.amd.aparapi.Kernel;
+import java.io.*;
public class Convolution {
- public static void main(final String[] _args) {
+ public static void main(final String[] _args) throws IOException {
- final File file = new File(_args.length == 1 ? _args[0] : "testcard.jpg");
+ final File file = new File(_args.length == 1 ? _args[0] : "./samples/convolution/testcard.jpg").getCanonicalFile();
final ImageConvolution convolution = new ImageConvolution();
diff --git a/samples/mandel/src/com/amd/aparapi/sample/mandel/Main.java b/samples/mandel/src/com/amd/aparapi/sample/mandel/Main.java
index 13de958505466f8a17ce3af2cbe84f3481d130f8..d527917a74d531e9ff11423126c708fb317dc956 100644
--- a/samples/mandel/src/com/amd/aparapi/sample/mandel/Main.java
+++ b/samples/mandel/src/com/amd/aparapi/sample/mandel/Main.java
@@ -60,6 +60,16 @@ import java.util.List;
public class Main{
+ static {
+ System.setProperty("com.amd.aparapi.dumpProfilesOnExit", "true");
+// KernelManager.setKernelManager(new KernelManager() {
+// @Override
+// protected List<Device.TYPE> getPreferredDeviceTypes() {
+// return Collections.singletonList(Device.TYPE.CPU);
+// }
+// });
+ }
+
/**
* An Aparapi Kernel implementation for creating a scaled view of the mandelbrot set.
*
@@ -70,13 +80,13 @@ public class Main{
public static class MandelKernel extends Kernel{
/** RGB buffer used to store the Mandelbrot image. This buffer holds (width * height) RGB values. */
- final private int rgb[];
+ private int[] rgb;
/** Mandelbrot image width. */
- final private int width;
+ private int width;
/** Mandelbrot image height. */
- final private int height;
+ private int height;
/** Maximum iterations for Mandelbrot. */
final private int maxIterations = 64;
@@ -112,6 +122,12 @@ public class Main{
}
+ public void resetImage(int _width, int _height, int[] _rgb) {
+ width = _width;
+ height = _height;
+ rgb = _rgb;
+ }
+
public int getCount(float x, float y) {
int count = 0;
@@ -152,6 +168,9 @@ public class Main{
scale = _scale;
}
+ public int[] getRgbs() {
+ return rgb;
+ }
}
/** User selected zoom-in point on the Mandelbrot view. */
diff --git a/samples/mandel/src/com/amd/aparapi/sample/mandel/Main2D.java b/samples/mandel/src/com/amd/aparapi/sample/mandel/Main2D.java
index 8a1b7faa68eceb14aeae40c133bf2d6f57303bd0..5bdd9805077801c6620d6c7719a9ef8a7957da50 100644
--- a/samples/mandel/src/com/amd/aparapi/sample/mandel/Main2D.java
+++ b/samples/mandel/src/com/amd/aparapi/sample/mandel/Main2D.java
@@ -143,6 +143,7 @@ public class Main2D{
@SuppressWarnings("serial") public static void main(String[] _args) {
+
final JFrame frame = new JFrame("MandelBrot");
/** Mandelbrot image height. */
diff --git a/samples/median/src/com/amd/aparapi/sample/median/MedianDemo.java b/samples/median/src/com/amd/aparapi/sample/median/MedianDemo.java
index f4e3e28c5d7e748613d067ba6e76dbe018429b6e..b94c359d1f03037bb6828bcb7d10751fcb489b4e 100644
--- a/samples/median/src/com/amd/aparapi/sample/median/MedianDemo.java
+++ b/samples/median/src/com/amd/aparapi/sample/median/MedianDemo.java
@@ -1,6 +1,5 @@
package com.amd.aparapi.sample.median;
-import com.amd.aparapi.device.*;
import com.amd.aparapi.internal.kernel.*;
import javax.imageio.*;
@@ -8,7 +7,6 @@ import javax.swing.*;
import java.awt.*;
import java.awt.image.*;
import java.io.*;
-import java.util.*;
/**
* Demonstrate use of __private namespaces and @NoCL annotations.
@@ -27,12 +25,10 @@ public class MedianDemo {
}
}
- private static final boolean TEST_JTP = true;
-
public static void main(String[] ignored) {
final int size = 5;
- System.setProperty("com.amd.aparapi.enableShowGeneratedOpenCL", "true");
- boolean verbose = true;
+ System.setProperty("com.amd.aparapi.dumpProfilesOnExit", "true");
+ boolean verbose = false;
if (verbose)
{
System.setProperty("com.amd.aparapi.enableVerboseJNI", "true");
@@ -42,18 +38,22 @@ public class MedianDemo {
System.setProperty("com.amd.aparapi.enableExecutionModeReporting", "true");
}
- if (TEST_JTP) {
- LinkedHashSet<Device> devices = new LinkedHashSet<>(Collections.singleton(JavaDevice.THREAD_POOL));
- KernelManager.instance().setDefaultPreferredDevices(devices);
- }
+// KernelManager.setKernelManager(new KernelManager(){
+// @Override
+// protected Comparator<OpenCLDevice> getDefaultGPUComparator() {
+// return new Comparator<OpenCLDevice>() {
+// @Override
+// public int compare(OpenCLDevice o1, OpenCLDevice o2) {
+// return o2.getMaxComputeUnits() - o1.getMaxComputeUnits();
+// }
+// };
+// }
+// });
+
+ System.out.println(KernelManager.instance().bestDevice());
int[] argbs = testImage.getRGB(0, 0, testImage.getWidth(), testImage.getHeight(), null, 0, testImage.getWidth());
- MedianKernel7x7 kernel = new MedianKernel7x7();
- kernel._imageTypeOrdinal = MedianKernel7x7.RGB;
- kernel._sourceWidth = testImage.getWidth();
- kernel._sourceHeight = testImage.getHeight();
- kernel._sourcePixels = argbs;
- kernel._destPixels = new int[argbs.length];
+ MedianKernel7x7 kernel = createMedianKernel(argbs);
kernel.processImages(new MedianSettings(size));
BufferedImage out = new BufferedImage(testImage.getWidth(), testImage.getHeight(), BufferedImage.TYPE_INT_RGB);
@@ -71,12 +71,35 @@ public class MedianDemo {
frame.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);
frame.setVisible(true);
- int reps = 20;
+ StringBuilder builder = new StringBuilder();
+ KernelManager.instance().reportDeviceUsage(builder, true);
+ System.out.println(builder);
+
+ int reps = 50;
+ final boolean newKernel = false;
for (int rep = 0; rep < reps; ++rep) {
+ if (newKernel) {
+ kernel.dispose();
+ kernel = createMedianKernel(argbs);
+ }
long start = System.nanoTime();
kernel.processImages(new MedianSettings(size));
long elapsed = System.nanoTime() - start;
System.out.println("elapsed = " + elapsed / 1000000f + "ms");
}
+
+ builder = new StringBuilder();
+ KernelManager.instance().reportDeviceUsage(builder, true);
+ System.out.println(builder);
+ }
+
+ private static MedianKernel7x7 createMedianKernel(int[] argbs) {
+ MedianKernel7x7 kernel = new MedianKernel7x7();
+ kernel._imageTypeOrdinal = MedianKernel7x7.RGB;
+ kernel._sourceWidth = testImage.getWidth();
+ kernel._sourceHeight = testImage.getHeight();
+ kernel._sourcePixels = argbs;
+ kernel._destPixels = new int[argbs.length];
+ return kernel;
}
}