diff --git a/CHANGELOG.md b/CHANGELOG.md index 44bd859387edb83bd6ba9d4a2c4a0c5c2c7f797b..981c2e29a474a70324ef3b9292de7f722e967189 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,7 @@ ## 1.8.0 * Updated KernelManager to facilitate class extensions having constructors with non static parameters +* Enable kernel profiling and execution simultaneously on multiple devices (multiple threads calling same kernel class on multiple devices) ## 1.7.0 diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md index f0959e8c337122f4c6b9d2634b7025db014a94f2..84040ab351b91d7d553733e267789bf4fcec7a37 100644 --- a/CONTRIBUTORS.md +++ b/CONTRIBUTORS.md @@ -48,4 +48,5 @@ Below are some of the specific details of various contributions. * Luis Mendes submited PR for issue #84 - Fully support OpenCL 1.2 barrier() - localBarrier(), globalBarrier() and localGlobalBarrier() * Luis Mendes with suggestions by Automenta submited PR for issue #62 and implemented new thread-safe API for Kernel profiling * Luis Mendes submited PR for issue #101 - Possible deadlock in JTP mode -* Luis Mendes submited PR to facilitate KernelManager class extension with non-static parameters in constructors \ No newline at end of file +* Luis Mendes submited PR to facilitate KernelManager class extension with non-static parameters in constructors +* Luis Mendes submited PR to Enable kernel profiling and execution simultaneously on multiple devices \ No newline at end of file diff --git a/pom.xml b/pom.xml index 1d1d5150d7ce15d391baa9b6e28fb806272654bf..122f5949406867ae2b71de50370b33599bcc22c1 100644 --- a/pom.xml +++ b/pom.xml @@ -87,7 +87,7 @@ <dependency> <groupId>com.aparapi</groupId> <artifactId>aparapi-jni</artifactId> - <version>1.1.2</version> + <version>1.2.0</version> </dependency> <dependency> <groupId>junit</groupId> diff --git a/src/main/java/com/aparapi/device/Device.java b/src/main/java/com/aparapi/device/Device.java index f15835e6eb9a5452a5f2a22e73828048ba1a03d6..2e43c43c718958bf7359ad36fde02dbc3f7a358b 100644 --- a/src/main/java/com/aparapi/device/Device.java +++ b/src/main/java/com/aparapi/device/Device.java @@ -18,7 +18,7 @@ package com.aparapi.device; import com.aparapi.*; import com.aparapi.internal.kernel.*; -public abstract class Device{ +public abstract class Device implements Comparable<Device> { public static enum TYPE { UNKNOWN(Integer.MAX_VALUE), @@ -179,4 +179,20 @@ public abstract class Device{ public int hashCode() { return Long.valueOf(getDeviceId()).hashCode(); } + + public int compareTo(Device other) { + if (type.rank < other.type.rank) { + return -1; + } else if (type.rank > other.type.rank) { + return 1; + } + + if (getDeviceId() < other.getDeviceId()) { + return -1; + } else if (getDeviceId() > other.getDeviceId()) { + return 1; + } + + return 0; + } } diff --git a/src/main/java/com/aparapi/device/JavaDevice.java b/src/main/java/com/aparapi/device/JavaDevice.java index 4b687dd6b48faad5e319e0ee1781b94929d40c74..a392cdf162579f1a51d23f0e5d815600f1a3c2e3 100644 --- a/src/main/java/com/aparapi/device/JavaDevice.java +++ b/src/main/java/com/aparapi/device/JavaDevice.java @@ -15,7 +15,7 @@ */ package com.aparapi.device; -public class JavaDevice extends Device { +public class JavaDevice extends Device implements Comparable<Device> { public static final JavaDevice THREAD_POOL = new JavaDevice(TYPE.JTP, "Java Thread Pool", -3); public static final JavaDevice ALTERNATIVE_ALGORITHM = new JavaDevice(TYPE.ALT, "Java Alternative Algorithm", -2); diff --git a/src/main/java/com/aparapi/device/OpenCLDevice.java b/src/main/java/com/aparapi/device/OpenCLDevice.java index 5288e38172b8275225a6abb1225dc8abf7fec3ac..a105540e54ba508c8dac8e79e5bfc7424d847de4 100644 --- a/src/main/java/com/aparapi/device/OpenCLDevice.java +++ b/src/main/java/com/aparapi/device/OpenCLDevice.java @@ -45,7 +45,7 @@ import com.aparapi.opencl.OpenCL.Local; import com.aparapi.opencl.OpenCL.Resource; import com.aparapi.opencl.OpenCL.Source; -public class OpenCLDevice extends Device{ +public class OpenCLDevice extends Device implements Comparable<Device> { private final OpenCLPlatform platform; diff --git a/src/main/java/com/aparapi/internal/kernel/KernelManager.java b/src/main/java/com/aparapi/internal/kernel/KernelManager.java index 7640a9af9785f383fb0d283f7da6c9c163a236d3..35a20e2c3a67d7e66b6672c2647e413dcec5738b 100644 --- a/src/main/java/com/aparapi/internal/kernel/KernelManager.java +++ b/src/main/java/com/aparapi/internal/kernel/KernelManager.java @@ -48,7 +48,7 @@ public class KernelManager { /** * Default KernelManager initialization.<br/> - * Convenience method for being overriden to an empty implementation, so that derived + * Convenience method for being overridden to an empty implementation, so that derived * KernelManager classes can provide non static parameters to their constructors. */ protected void setup() { diff --git a/src/main/java/com/aparapi/internal/kernel/KernelPreferences.java b/src/main/java/com/aparapi/internal/kernel/KernelPreferences.java index 7c553390fde4758450579a62f4d1f62817b74882..28f307dcf0ceca674e486949e5d22daae56b0dc9 100644 --- a/src/main/java/com/aparapi/internal/kernel/KernelPreferences.java +++ b/src/main/java/com/aparapi/internal/kernel/KernelPreferences.java @@ -19,11 +19,15 @@ import com.aparapi.*; import com.aparapi.device.*; import java.util.*; +import java.util.concurrent.atomic.AtomicReference; +/** + * Thread safe class holding the kernel preferences for a given kernel class. + */ public class KernelPreferences { private final Class<? extends Kernel> kernelClass; private final KernelManager manager; - private volatile LinkedList<Device> preferredDevices = null; + private final AtomicReference<LinkedList<Device>> preferredDevices = new AtomicReference<>(null); private final LinkedHashSet<Device> failedDevices = new LinkedHashSet<>(); public KernelPreferences(KernelManager manager, Class<? extends Kernel> kernelClass) { @@ -39,14 +43,16 @@ public class KernelPreferences { public List<Device> getPreferredDevices(Kernel kernel) { maybeSetUpDefaultPreferredDevices(); + ArrayList<Device> copy; + synchronized (this) { + copy = new ArrayList<>(preferredDevices.get()); + } + if (kernel == null) { - return Collections.unmodifiableList(preferredDevices); + return Collections.unmodifiableList(copy); } + List<Device> localPreferredDevices = new ArrayList<>(); - ArrayList<Device> copy; - synchronized (preferredDevices) { - copy = new ArrayList(preferredDevices); - } for (Device device : copy) { if (kernel.isAllowDevice(device)) { localPreferredDevices.add(device); @@ -56,12 +62,12 @@ public class KernelPreferences { } synchronized void setPreferredDevices(LinkedHashSet<Device> _preferredDevices) { - if (preferredDevices != null) { - preferredDevices.clear(); - preferredDevices.addAll(_preferredDevices); + if (preferredDevices.get() != null) { + preferredDevices.get().clear(); + preferredDevices.get().addAll(_preferredDevices); } else { - preferredDevices = new LinkedList<>(_preferredDevices); + preferredDevices.set(new LinkedList<>(_preferredDevices)); } failedDevices.clear(); } @@ -72,22 +78,18 @@ public class KernelPreferences { } synchronized void markPreferredDeviceFailed() { - if (preferredDevices.size() > 0) { - failedDevices.add(preferredDevices.remove(0)); + if (preferredDevices.get().size() > 0) { + failedDevices.add(preferredDevices.get().remove(0)); } } private void maybeSetUpDefaultPreferredDevices() { - if (preferredDevices == null) { - synchronized (this) { - if (preferredDevices == null) { - preferredDevices = new LinkedList<>(manager.getDefaultPreferences().getPreferredDevices(null)); - } - } - } + if (preferredDevices.get() == null) { + preferredDevices.compareAndSet(null, new LinkedList<>(manager.getDefaultPreferences().getPreferredDevices(null))); + } } - public List<Device> getFailedDevices() { + public synchronized List<Device> getFailedDevices() { return new ArrayList<>(failedDevices); } } diff --git a/src/main/java/com/aparapi/internal/kernel/KernelProfile.java b/src/main/java/com/aparapi/internal/kernel/KernelProfile.java index 6803d8850b4ee7792422091e6c1a2f8424001240..933b3f2bb7d3a79c2c31037674963f1109d76b75 100644 --- a/src/main/java/com/aparapi/internal/kernel/KernelProfile.java +++ b/src/main/java/com/aparapi/internal/kernel/KernelProfile.java @@ -19,23 +19,22 @@ import com.aparapi.*; import com.aparapi.device.*; import java.util.*; +import java.util.concurrent.ConcurrentSkipListMap; +import java.util.concurrent.atomic.AtomicReference; import java.util.logging.*; /** - * Collects profiling information per kernel class per device. Not thread safe, it is necessary for client code to correctly synchronize on - * objects of this class. + * Collects profiling information per kernel class per device. */ public class KernelProfile { public static final double MILLION = 1000000d; private static Logger logger = Logger.getLogger(Config.getLoggerName()); private final Class<? extends Kernel> kernelClass; - private LinkedHashMap<Device, KernelDeviceProfile> deviceProfiles = new LinkedHashMap<>(); - private Device currentDevice; - private Device lastDevice; - private KernelDeviceProfile currentDeviceProfile; + private ConcurrentSkipListMap<Device, KernelDeviceProfile> deviceProfiles = new ConcurrentSkipListMap<>(); + private final AtomicReference<Device> currentDevice = new AtomicReference<Device>(null); private IProfileReportObserver observer; - + public KernelProfile(Class<? extends Kernel> _kernelClass) { kernelClass = _kernelClass; } @@ -60,24 +59,47 @@ public class KernelProfile { } } + /** + * Retrieves the last device profile that was updated by the last thread that made + * a profiling information update, when executing this kernel on the specified device. + * @return the device profile + */ public KernelDeviceProfile getLastDeviceProfile() { - return deviceProfiles.get(currentDevice); + return deviceProfiles.get(currentDevice.get()); } + /** + * Starts a profiling information gathering sequence for the current thread invoking this method + * regarding the specified execution device. + * @param device + */ void onStart(Device device) { - synchronized (deviceProfiles) { - currentDeviceProfile = deviceProfiles.get(device); - if (currentDeviceProfile == null) { - currentDeviceProfile = new KernelDeviceProfile(this, kernelClass, device); - deviceProfiles.put(device, currentDeviceProfile); + KernelDeviceProfile currentDeviceProfile = deviceProfiles.get(device); + if (currentDeviceProfile == null) { + currentDeviceProfile = new KernelDeviceProfile(this, kernelClass, device); + KernelDeviceProfile existingProfile = deviceProfiles.putIfAbsent(device, currentDeviceProfile); + if (existingProfile != null) { + currentDeviceProfile = existingProfile; } } currentDeviceProfile.onEvent(ProfilingEvent.START); - currentDevice = device; + currentDevice.set(device); } - void onEvent(ProfilingEvent event) { + /** + * Updates the profiling information for the current thread invoking this method regarding + * the specified execution device. + * + * @param device the device where the kernel is/was executed + * @param event the event for which the profiling information is being updated + */ + void onEvent(Device device, ProfilingEvent event) { + if (event == null) { + logger.log(Level.WARNING, "Discarding profiling event " + event + " for null device, for Kernel class: " + kernelClass.getName()); + return; + } + final KernelDeviceProfile deviceProfile = deviceProfiles.get(device); switch (event) { case CLASS_MODEL_BUILT: // fallthrough case OPENCL_GENERATED: // fallthrough @@ -86,10 +108,10 @@ public class KernelProfile { case PREPARE_EXECUTE: // fallthrough case EXECUTED: // fallthrough { - if (currentDeviceProfile == null) { + if (deviceProfile == null) { logger.log(Level.SEVERE, "Error in KernelProfile, no currentDevice (synchronization error?"); } - currentDeviceProfile.onEvent(event); + deviceProfile.onEvent(event); break; } case START: @@ -99,16 +121,6 @@ public class KernelProfile { } } - void onFinishedExecution() { - reset(); - } - - private void reset() { - lastDevice = currentDevice; - currentDevice = null; - currentDeviceProfile = null; - } - public Collection<Device> getDevices() { return deviceProfiles.keySet(); } diff --git a/src/main/java/com/aparapi/internal/kernel/KernelRunner.java b/src/main/java/com/aparapi/internal/kernel/KernelRunner.java index bc324d17f78fe902bc5e801ba4ccb5d8319b18a3..58960925a578a0d2f327b2208178214f90754280 100644 --- a/src/main/java/com/aparapi/internal/kernel/KernelRunner.java +++ b/src/main/java/com/aparapi/internal/kernel/KernelRunner.java @@ -366,7 +366,7 @@ public class KernelRunner extends KernelRunnerJNI{ boolean legacySequentialMode = kernel.getExecutionMode().equals(Kernel.EXECUTION_MODE.SEQ); passId = PASS_ID_PREPARING_EXECUTION; - _settings.profile.onEvent(ProfilingEvent.PREPARE_EXECUTE); + _settings.profile.onEvent(device, ProfilingEvent.PREPARE_EXECUTE); try { if (device == JavaDevice.ALTERNATIVE_ALGORITHM) { @@ -376,7 +376,7 @@ public class KernelRunner extends KernelRunnerJNI{ } } else { boolean silently = true; // not having an alternative algorithm is the normal state, and does not need reporting - fallBackToNextDevice(_settings, (Exception) null, silently); + fallBackToNextDevice(device, _settings, (Exception) null, silently); } } else { final int localSize0 = _settings.range.getLocalSize(0); @@ -1214,7 +1214,7 @@ public class KernelRunner extends KernelRunnerJNI{ } @SuppressWarnings("deprecation") - private Kernel executeOpenCL(ExecutionSettings _settings) throws AparapiException { + private Kernel executeOpenCL(Device device, ExecutionSettings _settings) throws AparapiException { // Read the array refs after kernel may have changed them // We need to do this as input to computing the localSize @@ -1228,7 +1228,7 @@ public class KernelRunner extends KernelRunnerJNI{ int returnValue = runKernelJNI(jniContextHandle, _settings.range, needSync, _settings.passes, inBufferRemote, outBufferRemote); if (returnValue != 0) { String reason = "OpenCL execution seems to have failed (runKernelJNI returned " + returnValue + ")"; - return fallBackToNextDevice(_settings, new AparapiException(reason)); + return fallBackToNextDevice(device, _settings, new AparapiException(reason)); } if (usesOopConversion == true) { @@ -1282,19 +1282,19 @@ public class KernelRunner extends KernelRunnerJNI{ } } - private Kernel fallBackToNextDevice(ExecutionSettings _settings, String _reason) { - return fallBackToNextDevice(_settings, new AparapiException(_reason)); + private Kernel fallBackToNextDevice(Device device, ExecutionSettings _settings, String _reason) { + return fallBackToNextDevice(device, _settings, new AparapiException(_reason)); } @SuppressWarnings("deprecation") - synchronized private Kernel fallBackToNextDevice(ExecutionSettings _settings, Exception _exception) { - return fallBackToNextDevice(_settings, _exception, false); + synchronized private Kernel fallBackToNextDevice(Device device, ExecutionSettings _settings, Exception _exception) { + return fallBackToNextDevice(device, _settings, _exception, false); } @SuppressWarnings("deprecation") - synchronized private Kernel fallBackToNextDevice(ExecutionSettings _settings, Exception _exception, boolean _silently) { + synchronized private Kernel fallBackToNextDevice(Device device, ExecutionSettings _settings, Exception _exception, boolean _silently) { isFallBack = true; - _settings.profile.onEvent(ProfilingEvent.EXECUTED); + _settings.profile.onEvent(device, ProfilingEvent.EXECUTED); if (_settings.legacyExecutionMode) { if (!_silently && logger.isLoggable(Level.WARNING)) { logger.warning("Execution mode " + kernel.getExecutionMode() + " failed for " + kernel + ": " + _exception.getMessage()); @@ -1307,6 +1307,8 @@ public class KernelRunner extends KernelRunnerJNI{ logger.warning("Device failed for " + kernel + ": " + _exception.getMessage()); } + //This method is synchronized thus ensuring thread safety on concurrent executions of the same kernel class, + //since preferences is shared between such threads. preferences.markPreferredDeviceFailed(); // Device nextDevice = preferences.getPreferredDevice(kernel); @@ -1337,11 +1339,7 @@ public class KernelRunner extends KernelRunnerJNI{ boolean legacyExecutionMode = kernel.getExecutionMode() != Kernel.EXECUTION_MODE.AUTO; ExecutionSettings settings = new ExecutionSettings(preferences, profile, _entrypoint, _range, _passes, legacyExecutionMode); - // Two Kernels of the same class share the same KernelPreferences object, and since failure (fallback) generally mutates - // the preferences object, we must lock it. Note this prevents two Kernels of the same class executing simultaneously. - synchronized (preferences) { - return executeInternalOuter(settings); - } + return executeInternalOuter(settings); } finally { executing = false; clearCancelMultiPass(); @@ -1368,7 +1366,7 @@ public class KernelRunner extends KernelRunnerJNI{ EXECUTION_MODE requestedExecutionMode = kernel.getExecutionMode(); if (requestedExecutionMode.isOpenCL() && _settings.range.getDevice() != null && !(_settings.range.getDevice() instanceof OpenCLDevice)) { - fallBackToNextDevice(_settings, "OpenCL EXECUTION_MODE was requested but Device supplied was not an OpenCLDevice"); + fallBackToNextDevice(_settings.range.getDevice(), _settings, "OpenCL EXECUTION_MODE was requested but Device supplied was not an OpenCLDevice"); } Device device = _settings.range.getDevice(); @@ -1407,20 +1405,20 @@ public class KernelRunner extends KernelRunnerJNI{ openCLDevice = (OpenCLDevice) KernelManager.DeprecatedMethods.bestGPU(); jniFlags |= JNI_FLAG_USE_GPU; // this flag might be redundant now. if (openCLDevice == null) { - return fallBackToNextDevice(_settings, "GPU request can't be honored, no GPU device"); + return fallBackToNextDevice(null, _settings, "GPU request can't be honored, no GPU device"); } } else if (requestedExecutionMode.equals(EXECUTION_MODE.ACC)) { // Get the best ACC openCLDevice = (OpenCLDevice) KernelManager.DeprecatedMethods.bestACC(); jniFlags |= JNI_FLAG_USE_ACC; // this flag might be redundant now. if (openCLDevice == null) { - return fallBackToNextDevice(_settings, "ACC request can't be honored, no ACC device"); + return fallBackToNextDevice(null, _settings, "ACC request can't be honored, no ACC device"); } } else { // We fetch the first CPU device openCLDevice = (OpenCLDevice) KernelManager.DeprecatedMethods.firstDevice(Device.TYPE.CPU); if (openCLDevice == null) { - return fallBackToNextDevice(_settings, "CPU request can't be honored, no CPU device"); + return fallBackToNextDevice(null, _settings, "CPU request can't be honored, no CPU device"); } } } else { @@ -1443,10 +1441,10 @@ public class KernelRunner extends KernelRunnerJNI{ try { final ClassModel classModel = ClassModel.createClassModel(kernel.getClass()); entryPoint = classModel.getEntrypoint(_settings.entrypoint, kernel); - _settings.profile.onEvent(ProfilingEvent.CLASS_MODEL_BUILT); + _settings.profile.onEvent(device, ProfilingEvent.CLASS_MODEL_BUILT); } catch (final Exception exception) { - _settings.profile.onEvent(ProfilingEvent.CLASS_MODEL_BUILT); - return fallBackToNextDevice(_settings, exception); + _settings.profile.onEvent(device, ProfilingEvent.CLASS_MODEL_BUILT); + return fallBackToNextDevice(device, _settings, exception); } } @@ -1461,11 +1459,11 @@ public class KernelRunner extends KernelRunnerJNI{ // Init the device to check capabilities before emitting the // code that requires the capabilities. jniContextHandle = initJNI(kernel, openCLDevice, jniFlags); // openCLDevice will not be null here - _settings.profile.onEvent(ProfilingEvent.INIT_JNI); + _settings.profile.onEvent(device, ProfilingEvent.INIT_JNI); } // end of synchronized! issue 68 if (jniContextHandle == 0) { - return fallBackToNextDevice(_settings, "initJNI failed to return a valid handle"); + return fallBackToNextDevice(device, _settings, "initJNI failed to return a valid handle"); } final String extensions = getExtensionsJNI(jniContextHandle); @@ -1481,11 +1479,11 @@ public class KernelRunner extends KernelRunnerJNI{ } if (entryPoint.requiresDoublePragma() && !hasFP64Support()) { - return fallBackToNextDevice(_settings, "FP64 required but not supported"); + return fallBackToNextDevice(device, _settings, "FP64 required but not supported"); } if (entryPoint.requiresByteAddressableStorePragma() && !hasByteAddressableStoreSupport()) { - return fallBackToNextDevice(_settings, "Byte addressable stores required but not supported"); + return fallBackToNextDevice(device, _settings, "Byte addressable stores required but not supported"); } final boolean all32AtomicsAvailable = hasGlobalInt32BaseAtomicsSupport() @@ -1494,7 +1492,7 @@ public class KernelRunner extends KernelRunnerJNI{ if (entryPoint.requiresAtomic32Pragma() && !all32AtomicsAvailable) { - return fallBackToNextDevice(_settings, "32 bit Atomics required but not supported"); + return fallBackToNextDevice(device, _settings, "32 bit Atomics required but not supported"); } String openCL; @@ -1509,20 +1507,20 @@ public class KernelRunner extends KernelRunnerJNI{ else if (Config.enableShowGeneratedOpenCL) { System.out.println(openCL); } - _settings.profile.onEvent(ProfilingEvent.OPENCL_GENERATED); + _settings.profile.onEvent(device, ProfilingEvent.OPENCL_GENERATED); openCLCache.put(kernel.getClass(), openCL); } catch (final CodeGenException codeGenException) { openCLCache.put(kernel.getClass(), CODE_GEN_ERROR_MARKER); - _settings.profile.onEvent(ProfilingEvent.OPENCL_GENERATED); - return fallBackToNextDevice(_settings, codeGenException); + _settings.profile.onEvent(device, ProfilingEvent.OPENCL_GENERATED); + return fallBackToNextDevice(device, _settings, codeGenException); } } else { if (openCL.equals(CODE_GEN_ERROR_MARKER)) { - _settings.profile.onEvent(ProfilingEvent.OPENCL_GENERATED); + _settings.profile.onEvent(device, ProfilingEvent.OPENCL_GENERATED); boolean silently = true; // since we must have already reported the CodeGenException - return fallBackToNextDevice(_settings, null, silently); + return fallBackToNextDevice(device, _settings, null, silently); } } } @@ -1547,9 +1545,9 @@ public class KernelRunner extends KernelRunnerJNI{ } } } - _settings.profile.onEvent(ProfilingEvent.OPENCL_COMPILED); + _settings.profile.onEvent(device, ProfilingEvent.OPENCL_COMPILED); if (handle == 0) { - return fallBackToNextDevice(_settings, "OpenCL compile failed"); + return fallBackToNextDevice(device, _settings, "OpenCL compile failed"); } args = new KernelArg[entryPoint.getReferencedFields().size()]; @@ -1600,7 +1598,7 @@ public class KernelRunner extends KernelRunnerJNI{ try { setMultiArrayType(args[i], type); } catch (AparapiException e) { - return fallBackToNextDevice(_settings, "failed to set kernel arguement " + return fallBackToNextDevice(device, _settings, "failed to set kernel arguement " + args[i].getName() + ". Aparapi only supports 2D and 3D arrays."); } } else { @@ -1677,27 +1675,27 @@ public class KernelRunner extends KernelRunnerJNI{ argc = i; setArgsJNI(jniContextHandle, args, argc); - _settings.profile.onEvent(ProfilingEvent.PREPARE_EXECUTE); + _settings.profile.onEvent(device, ProfilingEvent.PREPARE_EXECUTE); try { - executeOpenCL(_settings); + executeOpenCL(device, _settings); isFallBack = false; } catch (final AparapiException e) { - fallBackToNextDevice(_settings, e); + fallBackToNextDevice(device, _settings, e); } } else { // (entryPoint != null) && !entryPoint.shouldFallback() - fallBackToNextDevice(_settings, "failed to locate entrypoint"); + fallBackToNextDevice(device, _settings, "failed to locate entrypoint"); } } else { // (entryPoint == null) || (isFallBack) try { - executeOpenCL(_settings); + executeOpenCL(device, _settings); isFallBack = false; } catch (final AparapiException e) { - fallBackToNextDevice(_settings, e); + fallBackToNextDevice(device, _settings, e); } } } else { // isOpenCL if (!(device instanceof JavaDevice)) { - fallBackToNextDevice(_settings, "Non-OpenCL Kernel.EXECUTION_MODE requested but device is not a JavaDevice "); + fallBackToNextDevice(device, _settings, "Non-OpenCL Kernel.EXECUTION_MODE requested but device is not a JavaDevice "); } executeJava(_settings, (JavaDevice) device); } @@ -1709,7 +1707,7 @@ public class KernelRunner extends KernelRunnerJNI{ return kernel; } finally { - _settings.profile.onEvent(ProfilingEvent.EXECUTED); + _settings.profile.onEvent(device, ProfilingEvent.EXECUTED); maybeReportProfile(_settings); } }