diff --git a/CHANGELOG.md b/CHANGELOG.md index 3fc672cc593ebd4f148306cb9d575496cd6971c2..a527df9d24ad082217bc9bf68b78bb7efa427985 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,9 @@ * If statements with empty blocks and comparisons outside of if or while statements now compile and run on the GPU. * Fix kernel management inconsistencies regarding preferred devices management +* Fix Java execution mode with barriers to not deadlock when a thread dies or is interrupted (InterruptedException) +* Fix Java execution mode to fail-fast when Kernel execution fails +* Java execution mode now provides detailed backtraces of failed Kernel threads including passId, groupIds, globalIds and localIds ## 1.9.0 diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md index 2c1464ec1fda8e941cbb1f6015ee16f7957266d9..5545915031a26c1c0d63359ea16346ac323a9cd3 100644 --- a/CONTRIBUTORS.md +++ b/CONTRIBUTORS.md @@ -52,4 +52,7 @@ Below are some of the specific details of various contributions. * Luis Mendes submited PR to Enable kernel profiling and execution simultaneously on multiple devices * Luis Mendes submited PR to fix issue #78 - Signed integer constants were interpreted as unsigned values for instruction SIPUSH * Luis Mendes submited PR to Support for OpenCLDevice configurator/configure API -* Luis mendes submited PR to fix kernel management inconsistencies regarding preferred devices management \ No newline at end of file +* Luis Mendes submited PR to fix kernel management inconsistencies regarding preferred devices management +* Luis Mendes submited PR to Fix Java execution mode with barriers to not deadlock when a thread dies or is interrupted (InterruptedException) +* Luis Mendes submited PR to Fix Java execution mode to fail-fast when Kernel execution fails +* Luis Mendes submited PR to Java execution mode now provides detailed backtraces of failed Kernel threads including passId, groupIds, globalIds and localIds diff --git a/src/main/java/com/aparapi/Kernel.java b/src/main/java/com/aparapi/Kernel.java index a41e2eab5aa634cd5c381943f9895f63c83d55bf..dc96f2e101a79c545747f6cbfadbbaa50088e5f8 100644 --- a/src/main/java/com/aparapi/Kernel.java +++ b/src/main/java/com/aparapi/Kernel.java @@ -79,13 +79,17 @@ import java.util.List; import java.util.Map; import java.util.concurrent.BrokenBarrierException; import java.util.concurrent.CyclicBarrier; +import java.util.concurrent.ForkJoinPool; +import java.util.concurrent.ForkJoinPool.ManagedBlocker; import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.atomic.AtomicReference; import java.util.function.IntBinaryOperator; import java.util.logging.Logger; import com.aparapi.device.Device; import com.aparapi.device.JavaDevice; import com.aparapi.device.OpenCLDevice; +import com.aparapi.internal.kernel.IKernelBarrier; import com.aparapi.internal.kernel.KernelArg; import com.aparapi.internal.kernel.KernelDeviceProfile; import com.aparapi.internal.kernel.KernelManager; @@ -514,9 +518,7 @@ public abstract class Kernel implements Cloneable { private int passId; - private volatile CyclicBarrier localBarrier; - - private boolean localBarrierDisabled; + private final AtomicReference<IKernelBarrier> localBarrier = new AtomicReference<IKernelBarrier>(); /** * Default constructor @@ -534,7 +536,7 @@ public abstract class Kernel implements Cloneable { groupIds = kernelState.getGroupIds(); range = kernelState.getRange(); passId = kernelState.getPassId(); - localBarrier = kernelState.getLocalBarrier(); + localBarrier.set(kernelState.getLocalBarrier()); } /** @@ -640,34 +642,72 @@ public abstract class Kernel implements Cloneable { /** * @return the localBarrier */ - public CyclicBarrier getLocalBarrier() { - return localBarrier; + public IKernelBarrier getLocalBarrier() { + return localBarrier.get(); } /** * @param localBarrier the localBarrier to set */ - public void setLocalBarrier(CyclicBarrier localBarrier) { - this.localBarrier = localBarrier; + public void setLocalBarrier(IKernelBarrier localBarrier) { + this.localBarrier.set(localBarrier); } public void awaitOnLocalBarrier() { - if (!localBarrierDisabled) { - try { - kernelState.getLocalBarrier().await(); - } catch (final InterruptedException e) { - // TODO Auto-generated catch block - e.printStackTrace(); - } catch (final BrokenBarrierException e) { - // TODO Auto-generated catch block - e.printStackTrace(); - } - } + boolean completed = false; + final IKernelBarrier barrier = localBarrier.get(); + while (!completed && barrier != null) { + try { + ForkJoinPool.managedBlock(barrier); //ManagedBlocker already has to be reentrant + completed = true; + } catch (InterruptedException ex) { + //Empty on purpose, either barrier is disabled on InterruptedException or lock will have to complete + } + } } public void disableLocalBarrier() { - localBarrierDisabled = true; + final IKernelBarrier barrier = localBarrier.getAndSet(null); + if (barrier != null) { + barrier.cancelBarrier(); + } } + + public String describe() { + final StringBuilder sb = new StringBuilder(100); + sb.append("Pass Id: "); + sb.append(passId); + sb.append(" - Group IDs: ["); + boolean first = true; + for (int groupId : groupIds) { + if (!first) { + sb.append(", "); + } + sb.append(groupId); + first = false; + } + sb.append("] - Global IDs: ["); + first = true; + for (int globalId : globalIds) { + if (!first) { + sb.append(", "); + } + sb.append(globalId); + first = false; + } + sb.append("], Local IDs: ["); + first = true; + for (int localId : localIds) { + if (!first) { + sb.append(", "); + } + sb.append(localId); + first = false; + } + sb.append("]"); + + return sb.toString(); + } } /** diff --git a/src/main/java/com/aparapi/exception/AparapiBrokenBarrierException.java b/src/main/java/com/aparapi/exception/AparapiBrokenBarrierException.java new file mode 100644 index 0000000000000000000000000000000000000000..afc128b4b02656d474d32f0e6ffb5bf19f2eec4d --- /dev/null +++ b/src/main/java/com/aparapi/exception/AparapiBrokenBarrierException.java @@ -0,0 +1,54 @@ +/** + * Copyright (c) 2016 - 2018 Syncleus, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.aparapi.exception; + +/** + * Exception is thrown when an Aparapi kernel is executing in Java mode, + * and a barrier cannot be completed due to threads that die during its execution. + * + * Aparapi ensures execution won't deadlock due to such an event, resulting in + * fail-fast code, and allowing easy perception of the failure cause. + * + * One such case is when an Aparapi kernel accesses outside a valid array position. + * + * + * @author CoreRasurae + */ +public class AparapiBrokenBarrierException extends RuntimeException { + + /** + * + */ + private static final long serialVersionUID = 1472616497910700885L; + + public AparapiBrokenBarrierException(String message) { + super(message); + } + + public AparapiBrokenBarrierException(Throwable cause) { + super(cause); + } + + public AparapiBrokenBarrierException(String message, Throwable cause) { + super(message, cause); + } + + public AparapiBrokenBarrierException(String message, Throwable cause, boolean enableSuppression, + boolean writableStackTrace) { + super(message, cause, enableSuppression, writableStackTrace); + } + +} diff --git a/src/main/java/com/aparapi/exception/AparapiKernelFailedException.java b/src/main/java/com/aparapi/exception/AparapiKernelFailedException.java new file mode 100644 index 0000000000000000000000000000000000000000..67499a1d06b2eb6bfef2224f44955dd06b552c8c --- /dev/null +++ b/src/main/java/com/aparapi/exception/AparapiKernelFailedException.java @@ -0,0 +1,53 @@ +/** + * Copyright (c) 2016 - 2018 Syncleus, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.aparapi.exception; + +/** + * This exception is thrown when a Java kernel execution fails. + * + * It is intended to convey information about the local and global Ids of the thread that failed, while + * avoiding from polluting the relevant stack trace. + * + * One such example is when a Java kernel accesses outside an array due to an out of bounds index. + * + * @author CoreRasurae + */ +public class AparapiKernelFailedException extends RuntimeException { + + /** + * + */ + private static final long serialVersionUID = -7880680698955519049L; + + public AparapiKernelFailedException(String message) { + super(message); + } + + public AparapiKernelFailedException(String message, Throwable cause) { + super(message, cause); + } + + public AparapiKernelFailedException(String message, Throwable cause, boolean enableSuppression, + boolean writableStackTrace) { + super(message, cause, enableSuppression, writableStackTrace); + } + + @Override + public synchronized Throwable fillInStackTrace() { + //Suppress the stack trace of these exceptions + return null; + } +} diff --git a/src/main/java/com/aparapi/internal/kernel/IKernelBarrier.java b/src/main/java/com/aparapi/internal/kernel/IKernelBarrier.java new file mode 100644 index 0000000000000000000000000000000000000000..769c4082dd24ad6a01abcc9b51b944cdd39c97c8 --- /dev/null +++ b/src/main/java/com/aparapi/internal/kernel/IKernelBarrier.java @@ -0,0 +1,40 @@ +/** + * Copyright (c) 2016 - 2018 Syncleus, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.aparapi.internal.kernel; + +import java.util.concurrent.ForkJoinPool.ManagedBlocker; + +/** + * Provides the interface for Aparapi Kernel barriers. + * + * @author CoreRasurae + */ +public interface IKernelBarrier extends ManagedBlocker { + /** + * Cancels the barrier. + * + * All threads that may be waiting for the barrier are released and barrier is permanently disabled. + */ + public void cancelBarrier(); + + /** + * Breaks the barrier. + * + * All threads that may be waiting for the barrier are released and will throw {@link com.aparapi.exception.AparapiBrokenBarrierException}. + * @param t the Throwable causing the barrier to break. + */ + public void breakBarrier(Throwable e); +} diff --git a/src/main/java/com/aparapi/internal/kernel/KernelRunner.java b/src/main/java/com/aparapi/internal/kernel/KernelRunner.java index 3c0386d216c3658dd857a0810a05964252dafe22..c7bac631d65b95c4f68ed58d61ef2cd613edff4d 100644 --- a/src/main/java/com/aparapi/internal/kernel/KernelRunner.java +++ b/src/main/java/com/aparapi/internal/kernel/KernelRunner.java @@ -56,6 +56,8 @@ import com.aparapi.*; import com.aparapi.Kernel.Constant; import com.aparapi.Kernel.*; import com.aparapi.device.*; +import com.aparapi.exception.AparapiBrokenBarrierException; +import com.aparapi.exception.AparapiKernelFailedException; import com.aparapi.internal.annotation.*; import com.aparapi.internal.exception.*; import com.aparapi.internal.instruction.InstructionSet.*; @@ -71,7 +73,9 @@ import java.nio.*; import java.util.*; import java.util.concurrent.*; import java.util.concurrent.ForkJoinPool.*; +import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.atomic.AtomicIntegerArray; import java.util.concurrent.atomic.AtomicLong; import java.util.logging.*; @@ -311,42 +315,80 @@ public class KernelRunner extends KernelRunnerJNI{ return capabilitiesSet.contains(OpenCL.CL_KHR_GL_SHARING); } - private static final class FJSafeCyclicBarrier extends CyclicBarrier{ - FJSafeCyclicBarrier(final int threads) { - super(threads); + private static final class FJSafeBarrier implements IKernelBarrier { + final int threads; + final AtomicBoolean brokenBarrier = new AtomicBoolean(false); + final AtomicBoolean canceled = new AtomicBoolean(false); + final Object lock = new Object(); + int remainingThreads; + + FJSafeBarrier(final int threads) { + remainingThreads = threads; + this.threads = threads; } - - @Override public int await() throws InterruptedException, BrokenBarrierException { - class Awaiter implements ManagedBlocker{ - private int value; - - private boolean released; - - @Override public boolean block() throws InterruptedException { - try { - value = superAwait(); - released = true; - return true; - } catch (final BrokenBarrierException e) { - throw new RuntimeException(e); - } - } - - @Override public boolean isReleasable() { - return released; - } - - int getValue() { - return value; - } - } - final Awaiter awaiter = new Awaiter(); - ForkJoinPool.managedBlock(awaiter); - return awaiter.getValue(); + + /** + * Should be called by worker threads when a fatal exception occurs, + * so that all threads fail-fast and no deadlock occurs. + */ + public void breakBarrier(final Throwable e) { + synchronized (lock) { + brokenBarrier.set(true); + lock.notifyAll(); + } + } + + public void cancelBarrier() { + synchronized (lock) { + remainingThreads = threads; + canceled.set(true); + lock.notifyAll(); + } } - int superAwait() throws InterruptedException, BrokenBarrierException { - return super.await(); + @Override + public boolean block() throws InterruptedException { + if (canceled.get()) { + return true; + } + + synchronized (lock) { + remainingThreads--; + if (remainingThreads < 0) { + throw new Error("Thread count cannot be less than 0"); + } + + if (!brokenBarrier.get() && remainingThreads > 0) { + try { + lock.wait(); + } catch (InterruptedException e) { + //Either lock is already completed or thread will have to re-lock + if (remainingThreads != threads) { + //This thread will have to re-lock for the same barrier event + remainingThreads++; + throw e; + } + } + } else { + //InterruptedException will never occur here, thus lock completion is guaranteed + remainingThreads = threads; + lock.notifyAll(); + } + } + + //Breaking barrier at this point, allows barrier to be reused, if needed... + if (brokenBarrier.get()) { + throw new AparapiBrokenBarrierException("Barrier was broken"); + } + + //Threads are always ready to be released at this point + return true; + } + + @Override + public boolean isReleasable() { + //Ensure block() is always called by ForkJoinPool + return false; } } @@ -406,7 +448,7 @@ public class KernelRunner extends KernelRunnerJNI{ kernelState.setLocalId(0, 0); kernelState.setLocalId(1, 0); kernelState.setLocalId(2, 0); - kernelState.setLocalBarrier(new FJSafeCyclicBarrier(1)); + kernelState.setLocalBarrier(new FJSafeBarrier(1)); for (passId = 0; passId < _settings.passes; passId++) { if (getCancelState() == CANCEL_STATUS_TRUE) { @@ -469,7 +511,7 @@ public class KernelRunner extends KernelRunnerJNI{ * * This barrier is threadCount wide. We never hit the barrier from the dispatch thread. */ - final CyclicBarrier localBarrier = new FJSafeCyclicBarrier(threads); + final FJSafeBarrier localBarrier = new FJSafeBarrier(threads); final ThreadIdSetter threadIdSetter; @@ -678,9 +720,12 @@ public class KernelRunner extends KernelRunnerJNI{ threadIdSetter.set(kernelState, globalGroupId, threadId); kernelClone.run(); } - } - catch (RuntimeException | Error e) { - logger.log(Level.SEVERE, "Execution failed", e); + } catch (AparapiBrokenBarrierException e) { + //Intentionally empty to not obfuscate threads that failed executing the kernel with those that had + //the barrier broken by the first ones. + } catch (RuntimeException | Error e) { + localBarrier.breakBarrier(e); + throw new AparapiKernelFailedException(kernelState.describe(), e); } } }); @@ -688,7 +733,6 @@ public class KernelRunner extends KernelRunnerJNI{ tasks[id] = fjt; } - for (ForkJoinTask<?> task : tasks) { // This dispatch thread waits for all worker threads here. task.join(); } @@ -706,18 +750,6 @@ public class KernelRunner extends KernelRunnerJNI{ } } - private static void await(CyclicBarrier _barrier) { - try { - _barrier.await(); - } catch (final InterruptedException e) { - // TODO Auto-generated catch block - e.printStackTrace(); - } catch (final BrokenBarrierException e) { - // TODO Auto-generated catch block - e.printStackTrace(); - } - } - private KernelArg[] args = null; private boolean usesOopConversion = false;