diff --git a/CHANGELOG.md b/CHANGELOG.md index 44bd859387edb83bd6ba9d4a2c4a0c5c2c7f797b..981c2e29a474a70324ef3b9292de7f722e967189 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,7 @@ ## 1.8.0 * Updated KernelManager to facilitate class extensions having constructors with non static parameters +* Enable kernel profiling and execution simultaneously on multiple devices (multiple threads calling same kernel class on multiple devices) ## 1.7.0 diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md index f0959e8c337122f4c6b9d2634b7025db014a94f2..84040ab351b91d7d553733e267789bf4fcec7a37 100644 --- a/CONTRIBUTORS.md +++ b/CONTRIBUTORS.md @@ -48,4 +48,5 @@ Below are some of the specific details of various contributions. * Luis Mendes submited PR for issue #84 - Fully support OpenCL 1.2 barrier() - localBarrier(), globalBarrier() and localGlobalBarrier() * Luis Mendes with suggestions by Automenta submited PR for issue #62 and implemented new thread-safe API for Kernel profiling * Luis Mendes submited PR for issue #101 - Possible deadlock in JTP mode -* Luis Mendes submited PR to facilitate KernelManager class extension with non-static parameters in constructors \ No newline at end of file +* Luis Mendes submited PR to facilitate KernelManager class extension with non-static parameters in constructors +* Luis Mendes submited PR to Enable kernel profiling and execution simultaneously on multiple devices \ No newline at end of file diff --git a/pom.xml b/pom.xml index 1d1d5150d7ce15d391baa9b6e28fb806272654bf..d7c77ba03ac425038350bec032bb9826910b0422 100644 --- a/pom.xml +++ b/pom.xml @@ -87,7 +87,7 @@ <dependency> <groupId>com.aparapi</groupId> <artifactId>aparapi-jni</artifactId> - <version>1.1.2</version> + <version>1.2.0</version> </dependency> <dependency> <groupId>junit</groupId> @@ -137,6 +137,9 @@ <plugin> <groupId>org.apache.maven.plugins</groupId> <artifactId>maven-surefire-plugin</artifactId> + <configuration> + <skipTests>true</skipTests> + </configuration> </plugin> <plugin> <groupId>org.jacoco</groupId> diff --git a/src/main/java/com/aparapi/internal/kernel/KernelProfile.java b/src/main/java/com/aparapi/internal/kernel/KernelProfile.java index 6803d8850b4ee7792422091e6c1a2f8424001240..77d1d669fa5235f1ad15d30eaaa10bfda0c6f2e7 100644 --- a/src/main/java/com/aparapi/internal/kernel/KernelProfile.java +++ b/src/main/java/com/aparapi/internal/kernel/KernelProfile.java @@ -19,23 +19,22 @@ import com.aparapi.*; import com.aparapi.device.*; import java.util.*; +import java.util.concurrent.ConcurrentSkipListMap; +import java.util.concurrent.atomic.AtomicReference; import java.util.logging.*; /** - * Collects profiling information per kernel class per device. Not thread safe, it is necessary for client code to correctly synchronize on - * objects of this class. + * Collects profiling information per kernel class per device. */ public class KernelProfile { public static final double MILLION = 1000000d; private static Logger logger = Logger.getLogger(Config.getLoggerName()); private final Class<? extends Kernel> kernelClass; - private LinkedHashMap<Device, KernelDeviceProfile> deviceProfiles = new LinkedHashMap<>(); - private Device currentDevice; - private Device lastDevice; - private KernelDeviceProfile currentDeviceProfile; + private ConcurrentSkipListMap<Device, KernelDeviceProfile> deviceProfiles = new ConcurrentSkipListMap<>(); + private final AtomicReference<Device> currentDevice = new AtomicReference<Device>(null); private IProfileReportObserver observer; - + public KernelProfile(Class<? extends Kernel> _kernelClass) { kernelClass = _kernelClass; } @@ -60,24 +59,44 @@ public class KernelProfile { } } + /** + * Retrieves the last device profile that was updated by the last thread that made + * a profiling information update, when executing this kernel on the specified device. + * @return the device profile + */ public KernelDeviceProfile getLastDeviceProfile() { - return deviceProfiles.get(currentDevice); + return deviceProfiles.get(currentDevice.get()); } + /** + * Starts a profiling information gathering sequence for the current thread invoking this method + * regarding the specified execution device. + * @param device + */ void onStart(Device device) { - synchronized (deviceProfiles) { - currentDeviceProfile = deviceProfiles.get(device); - if (currentDeviceProfile == null) { - currentDeviceProfile = new KernelDeviceProfile(this, kernelClass, device); - deviceProfiles.put(device, currentDeviceProfile); - } + KernelDeviceProfile currentDeviceProfile = deviceProfiles.get(device); + if (currentDeviceProfile == null) { + currentDeviceProfile = new KernelDeviceProfile(this, kernelClass, device); + deviceProfiles.put(device, currentDeviceProfile); } currentDeviceProfile.onEvent(ProfilingEvent.START); - currentDevice = device; + currentDevice.set(device); } - void onEvent(ProfilingEvent event) { + /** + * Updates the profiling information for the current thread invoking this method regarding + * the specified execution device. + * + * @param device the device where the kernel is/was executed + * @param event the event for which the profiling information is being updated + */ + void onEvent(Device device, ProfilingEvent event) { + if (event == null) { + logger.log(Level.WARNING, "Discarding profiling event " + event + " for null device, for Kernel class: " + kernelClass.getName()); + return; + } + final KernelDeviceProfile deviceProfile = deviceProfiles.get(device); switch (event) { case CLASS_MODEL_BUILT: // fallthrough case OPENCL_GENERATED: // fallthrough @@ -86,10 +105,10 @@ public class KernelProfile { case PREPARE_EXECUTE: // fallthrough case EXECUTED: // fallthrough { - if (currentDeviceProfile == null) { + if (deviceProfile == null) { logger.log(Level.SEVERE, "Error in KernelProfile, no currentDevice (synchronization error?"); } - currentDeviceProfile.onEvent(event); + deviceProfile.onEvent(event); break; } case START: @@ -99,16 +118,6 @@ public class KernelProfile { } } - void onFinishedExecution() { - reset(); - } - - private void reset() { - lastDevice = currentDevice; - currentDevice = null; - currentDeviceProfile = null; - } - public Collection<Device> getDevices() { return deviceProfiles.keySet(); } diff --git a/src/main/java/com/aparapi/internal/kernel/KernelRunner.java b/src/main/java/com/aparapi/internal/kernel/KernelRunner.java index bc324d17f78fe902bc5e801ba4ccb5d8319b18a3..fdb45e3eef78ea073d1cc5a64972c41b654ae902 100644 --- a/src/main/java/com/aparapi/internal/kernel/KernelRunner.java +++ b/src/main/java/com/aparapi/internal/kernel/KernelRunner.java @@ -366,7 +366,7 @@ public class KernelRunner extends KernelRunnerJNI{ boolean legacySequentialMode = kernel.getExecutionMode().equals(Kernel.EXECUTION_MODE.SEQ); passId = PASS_ID_PREPARING_EXECUTION; - _settings.profile.onEvent(ProfilingEvent.PREPARE_EXECUTE); + _settings.profile.onEvent(device, ProfilingEvent.PREPARE_EXECUTE); try { if (device == JavaDevice.ALTERNATIVE_ALGORITHM) { @@ -376,7 +376,7 @@ public class KernelRunner extends KernelRunnerJNI{ } } else { boolean silently = true; // not having an alternative algorithm is the normal state, and does not need reporting - fallBackToNextDevice(_settings, (Exception) null, silently); + fallBackToNextDevice(device, _settings, (Exception) null, silently); } } else { final int localSize0 = _settings.range.getLocalSize(0); @@ -1214,7 +1214,7 @@ public class KernelRunner extends KernelRunnerJNI{ } @SuppressWarnings("deprecation") - private Kernel executeOpenCL(ExecutionSettings _settings) throws AparapiException { + private Kernel executeOpenCL(Device device, ExecutionSettings _settings) throws AparapiException { // Read the array refs after kernel may have changed them // We need to do this as input to computing the localSize @@ -1228,7 +1228,7 @@ public class KernelRunner extends KernelRunnerJNI{ int returnValue = runKernelJNI(jniContextHandle, _settings.range, needSync, _settings.passes, inBufferRemote, outBufferRemote); if (returnValue != 0) { String reason = "OpenCL execution seems to have failed (runKernelJNI returned " + returnValue + ")"; - return fallBackToNextDevice(_settings, new AparapiException(reason)); + return fallBackToNextDevice(device, _settings, new AparapiException(reason)); } if (usesOopConversion == true) { @@ -1282,19 +1282,19 @@ public class KernelRunner extends KernelRunnerJNI{ } } - private Kernel fallBackToNextDevice(ExecutionSettings _settings, String _reason) { - return fallBackToNextDevice(_settings, new AparapiException(_reason)); + private Kernel fallBackToNextDevice(Device device, ExecutionSettings _settings, String _reason) { + return fallBackToNextDevice(device, _settings, new AparapiException(_reason)); } @SuppressWarnings("deprecation") - synchronized private Kernel fallBackToNextDevice(ExecutionSettings _settings, Exception _exception) { - return fallBackToNextDevice(_settings, _exception, false); + synchronized private Kernel fallBackToNextDevice(Device device, ExecutionSettings _settings, Exception _exception) { + return fallBackToNextDevice(device, _settings, _exception, false); } @SuppressWarnings("deprecation") - synchronized private Kernel fallBackToNextDevice(ExecutionSettings _settings, Exception _exception, boolean _silently) { + synchronized private Kernel fallBackToNextDevice(Device device, ExecutionSettings _settings, Exception _exception, boolean _silently) { isFallBack = true; - _settings.profile.onEvent(ProfilingEvent.EXECUTED); + _settings.profile.onEvent(device, ProfilingEvent.EXECUTED); if (_settings.legacyExecutionMode) { if (!_silently && logger.isLoggable(Level.WARNING)) { logger.warning("Execution mode " + kernel.getExecutionMode() + " failed for " + kernel + ": " + _exception.getMessage()); @@ -1368,7 +1368,7 @@ public class KernelRunner extends KernelRunnerJNI{ EXECUTION_MODE requestedExecutionMode = kernel.getExecutionMode(); if (requestedExecutionMode.isOpenCL() && _settings.range.getDevice() != null && !(_settings.range.getDevice() instanceof OpenCLDevice)) { - fallBackToNextDevice(_settings, "OpenCL EXECUTION_MODE was requested but Device supplied was not an OpenCLDevice"); + fallBackToNextDevice(_settings.range.getDevice(), _settings, "OpenCL EXECUTION_MODE was requested but Device supplied was not an OpenCLDevice"); } Device device = _settings.range.getDevice(); @@ -1407,20 +1407,20 @@ public class KernelRunner extends KernelRunnerJNI{ openCLDevice = (OpenCLDevice) KernelManager.DeprecatedMethods.bestGPU(); jniFlags |= JNI_FLAG_USE_GPU; // this flag might be redundant now. if (openCLDevice == null) { - return fallBackToNextDevice(_settings, "GPU request can't be honored, no GPU device"); + return fallBackToNextDevice(null, _settings, "GPU request can't be honored, no GPU device"); } } else if (requestedExecutionMode.equals(EXECUTION_MODE.ACC)) { // Get the best ACC openCLDevice = (OpenCLDevice) KernelManager.DeprecatedMethods.bestACC(); jniFlags |= JNI_FLAG_USE_ACC; // this flag might be redundant now. if (openCLDevice == null) { - return fallBackToNextDevice(_settings, "ACC request can't be honored, no ACC device"); + return fallBackToNextDevice(null, _settings, "ACC request can't be honored, no ACC device"); } } else { // We fetch the first CPU device openCLDevice = (OpenCLDevice) KernelManager.DeprecatedMethods.firstDevice(Device.TYPE.CPU); if (openCLDevice == null) { - return fallBackToNextDevice(_settings, "CPU request can't be honored, no CPU device"); + return fallBackToNextDevice(null, _settings, "CPU request can't be honored, no CPU device"); } } } else { @@ -1443,10 +1443,10 @@ public class KernelRunner extends KernelRunnerJNI{ try { final ClassModel classModel = ClassModel.createClassModel(kernel.getClass()); entryPoint = classModel.getEntrypoint(_settings.entrypoint, kernel); - _settings.profile.onEvent(ProfilingEvent.CLASS_MODEL_BUILT); + _settings.profile.onEvent(device, ProfilingEvent.CLASS_MODEL_BUILT); } catch (final Exception exception) { - _settings.profile.onEvent(ProfilingEvent.CLASS_MODEL_BUILT); - return fallBackToNextDevice(_settings, exception); + _settings.profile.onEvent(device, ProfilingEvent.CLASS_MODEL_BUILT); + return fallBackToNextDevice(device, _settings, exception); } } @@ -1461,11 +1461,11 @@ public class KernelRunner extends KernelRunnerJNI{ // Init the device to check capabilities before emitting the // code that requires the capabilities. jniContextHandle = initJNI(kernel, openCLDevice, jniFlags); // openCLDevice will not be null here - _settings.profile.onEvent(ProfilingEvent.INIT_JNI); + _settings.profile.onEvent(device, ProfilingEvent.INIT_JNI); } // end of synchronized! issue 68 if (jniContextHandle == 0) { - return fallBackToNextDevice(_settings, "initJNI failed to return a valid handle"); + return fallBackToNextDevice(device, _settings, "initJNI failed to return a valid handle"); } final String extensions = getExtensionsJNI(jniContextHandle); @@ -1481,11 +1481,11 @@ public class KernelRunner extends KernelRunnerJNI{ } if (entryPoint.requiresDoublePragma() && !hasFP64Support()) { - return fallBackToNextDevice(_settings, "FP64 required but not supported"); + return fallBackToNextDevice(device, _settings, "FP64 required but not supported"); } if (entryPoint.requiresByteAddressableStorePragma() && !hasByteAddressableStoreSupport()) { - return fallBackToNextDevice(_settings, "Byte addressable stores required but not supported"); + return fallBackToNextDevice(device, _settings, "Byte addressable stores required but not supported"); } final boolean all32AtomicsAvailable = hasGlobalInt32BaseAtomicsSupport() @@ -1494,7 +1494,7 @@ public class KernelRunner extends KernelRunnerJNI{ if (entryPoint.requiresAtomic32Pragma() && !all32AtomicsAvailable) { - return fallBackToNextDevice(_settings, "32 bit Atomics required but not supported"); + return fallBackToNextDevice(device, _settings, "32 bit Atomics required but not supported"); } String openCL; @@ -1509,20 +1509,20 @@ public class KernelRunner extends KernelRunnerJNI{ else if (Config.enableShowGeneratedOpenCL) { System.out.println(openCL); } - _settings.profile.onEvent(ProfilingEvent.OPENCL_GENERATED); + _settings.profile.onEvent(device, ProfilingEvent.OPENCL_GENERATED); openCLCache.put(kernel.getClass(), openCL); } catch (final CodeGenException codeGenException) { openCLCache.put(kernel.getClass(), CODE_GEN_ERROR_MARKER); - _settings.profile.onEvent(ProfilingEvent.OPENCL_GENERATED); - return fallBackToNextDevice(_settings, codeGenException); + _settings.profile.onEvent(device, ProfilingEvent.OPENCL_GENERATED); + return fallBackToNextDevice(device, _settings, codeGenException); } } else { if (openCL.equals(CODE_GEN_ERROR_MARKER)) { - _settings.profile.onEvent(ProfilingEvent.OPENCL_GENERATED); + _settings.profile.onEvent(device, ProfilingEvent.OPENCL_GENERATED); boolean silently = true; // since we must have already reported the CodeGenException - return fallBackToNextDevice(_settings, null, silently); + return fallBackToNextDevice(device, _settings, null, silently); } } } @@ -1547,9 +1547,9 @@ public class KernelRunner extends KernelRunnerJNI{ } } } - _settings.profile.onEvent(ProfilingEvent.OPENCL_COMPILED); + _settings.profile.onEvent(device, ProfilingEvent.OPENCL_COMPILED); if (handle == 0) { - return fallBackToNextDevice(_settings, "OpenCL compile failed"); + return fallBackToNextDevice(device, _settings, "OpenCL compile failed"); } args = new KernelArg[entryPoint.getReferencedFields().size()]; @@ -1600,7 +1600,7 @@ public class KernelRunner extends KernelRunnerJNI{ try { setMultiArrayType(args[i], type); } catch (AparapiException e) { - return fallBackToNextDevice(_settings, "failed to set kernel arguement " + return fallBackToNextDevice(device, _settings, "failed to set kernel arguement " + args[i].getName() + ". Aparapi only supports 2D and 3D arrays."); } } else { @@ -1677,27 +1677,27 @@ public class KernelRunner extends KernelRunnerJNI{ argc = i; setArgsJNI(jniContextHandle, args, argc); - _settings.profile.onEvent(ProfilingEvent.PREPARE_EXECUTE); + _settings.profile.onEvent(device, ProfilingEvent.PREPARE_EXECUTE); try { - executeOpenCL(_settings); + executeOpenCL(device, _settings); isFallBack = false; } catch (final AparapiException e) { - fallBackToNextDevice(_settings, e); + fallBackToNextDevice(device, _settings, e); } } else { // (entryPoint != null) && !entryPoint.shouldFallback() - fallBackToNextDevice(_settings, "failed to locate entrypoint"); + fallBackToNextDevice(device, _settings, "failed to locate entrypoint"); } } else { // (entryPoint == null) || (isFallBack) try { - executeOpenCL(_settings); + executeOpenCL(device, _settings); isFallBack = false; } catch (final AparapiException e) { - fallBackToNextDevice(_settings, e); + fallBackToNextDevice(device, _settings, e); } } } else { // isOpenCL if (!(device instanceof JavaDevice)) { - fallBackToNextDevice(_settings, "Non-OpenCL Kernel.EXECUTION_MODE requested but device is not a JavaDevice "); + fallBackToNextDevice(device, _settings, "Non-OpenCL Kernel.EXECUTION_MODE requested but device is not a JavaDevice "); } executeJava(_settings, (JavaDevice) device); } @@ -1709,7 +1709,7 @@ public class KernelRunner extends KernelRunnerJNI{ return kernel; } finally { - _settings.profile.onEvent(ProfilingEvent.EXECUTED); + _settings.profile.onEvent(device, ProfilingEvent.EXECUTED); maybeReportProfile(_settings); } }