diff --git a/CHANGELOG.md b/CHANGELOG.md
index 138b20a095017bd8024cc933a7bf40f9d8fc2d44..00f2159e13189e12a381203a4886d4f4bed4a1bb 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,8 +1,8 @@
 # Aparapi Changelog
 
 ## 1.6.0
-
 * Add support for Local arguments in kernel functions
+* Add full support for OpenCL 1.2 atomic operations on arrays of integers.
 * Parent pom no longer points to a snapshot.
 
 ## 1.5.0
diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md
index 47890903b7715701c30789aaa932412477949ee3..6b40c74b10cdcb103009a5bb4afd700e739b12c8 100644
--- a/CONTRIBUTORS.md
+++ b/CONTRIBUTORS.md
@@ -42,3 +42,4 @@ Below are some of the specific details of various contributions.
 * George Vinokhodov submited a fix for a bug regarding forward references.
 * Dmitriy Shabanov submited PR for inline array feature.
 * Luis Mendes submited PR to support passing functions arguments containing Local arrays - issue #79
+* Luis Mendes submited PR for issue #81 - Full OpenCL 1.2 atomics support with AtomicInteger 
diff --git a/src/main/java/com/aparapi/Kernel.java b/src/main/java/com/aparapi/Kernel.java
index 4e85b98d8f8b14de3338c459e01ed7c02556658a..98d63d03bf243eed77c7ebdd2566daa8b1396905 100644
--- a/src/main/java/com/aparapi/Kernel.java
+++ b/src/main/java/com/aparapi/Kernel.java
@@ -79,6 +79,8 @@ import java.util.List;
 import java.util.Map;
 import java.util.concurrent.BrokenBarrierException;
 import java.util.concurrent.CyclicBarrier;
+import java.util.concurrent.atomic.AtomicInteger;
+import java.util.function.IntBinaryOperator;
 import java.util.logging.Logger;
 
 import com.aparapi.device.Device;
@@ -2307,6 +2309,110 @@ public abstract class Kernel implements Cloneable {
       }
    }
 
+   @OpenCLMapping(atomic32 = true)
+   protected final int atomicGet(AtomicInteger p) {
+	   return p.get();
+   }
+   
+   @OpenCLMapping(atomic32 = true)
+   protected final void atomicSet(AtomicInteger p, int val) {
+	   p.set(val);
+   }
+
+   @OpenCLMapping(atomic32 = true, mapTo = "atomic_add")
+   protected final int atomicAdd(AtomicInteger p, int val) {
+	   return p.getAndAdd(val);
+   }
+
+   @OpenCLMapping(atomic32 = true, mapTo = "atomic_sub")
+   protected final int atomicSub(AtomicInteger p, int val) {
+	   return p.getAndAdd(-val);
+   }
+
+   @OpenCLMapping(atomic32 = true, mapTo = "atomic_xchg")
+   protected final int atomicXchg(AtomicInteger p, int newVal) {
+	   return p.getAndSet(newVal);
+   }
+
+   @OpenCLMapping(atomic32 = true, mapTo = "atomic_inc")
+   protected final int atomicInc(AtomicInteger p) {
+	   return p.getAndIncrement();
+   }
+   
+   @OpenCLMapping(atomic32 = true, mapTo = "atomic_dec")
+   protected final int atomicDec(AtomicInteger p) {
+	   return p.getAndDecrement();
+   }
+   
+   @OpenCLMapping(atomic32 = true, mapTo = "atomic_cmpxchg")
+   protected final int atomicCmpXchg(AtomicInteger p, int expectedVal, int newVal) {
+	   if (p.compareAndSet(expectedVal, newVal)) {
+		   return expectedVal;
+	   } else {
+		   return p.get();
+	   }
+   }
+ 
+   private static final IntBinaryOperator minOperator = new IntBinaryOperator() {
+	  @Override
+	  public int applyAsInt(int oldVal, int newVal) {
+	  	 return Math.min(oldVal, newVal);
+	  }
+   };
+
+   @OpenCLMapping(atomic32 = true, mapTo = "atomic_min")
+   protected final int atomicMin(AtomicInteger p, int val) {
+	   return p.getAndAccumulate(val, minOperator);
+   }
+
+   private static final IntBinaryOperator maxOperator = new IntBinaryOperator() {
+	  @Override
+	  public int applyAsInt(int oldVal, int newVal) {
+		 return Math.max(oldVal, newVal);
+	  }	   
+   };
+
+   @OpenCLMapping(atomic32 = true, mapTo = "atomic_max")
+   protected final int atomicMax(AtomicInteger p, int val) {
+	   return p.getAndAccumulate(val, maxOperator);
+   }
+
+   private static final IntBinaryOperator andOperator = new IntBinaryOperator() {
+	  @Override
+	  public int applyAsInt(int oldVal, int newVal) {
+		 return oldVal & newVal;
+	  }	   
+   };
+
+   @OpenCLMapping(atomic32 = true, mapTo = "atomic_and")
+   protected final int atomicAnd(AtomicInteger p, int val) {
+	   return p.getAndAccumulate(val, andOperator);
+   }
+
+   private static final IntBinaryOperator orOperator = new IntBinaryOperator() {
+	  @Override
+	  public int applyAsInt(int oldVal, int newVal) {
+		 return oldVal | newVal;
+	  }	   
+   };
+   
+   @OpenCLMapping(atomic32 = true, mapTo = "atomic_or")
+   protected final int atomicOr(AtomicInteger p, int val) {
+	   return p.getAndAccumulate(val, orOperator);
+   }
+
+   private static final IntBinaryOperator xorOperator = new IntBinaryOperator() {
+	  @Override
+	  public int applyAsInt(int oldVal, int newVal) {
+		 return oldVal ^ newVal;
+	  }	   
+   };
+   
+   @OpenCLMapping(atomic32 = true, mapTo = "atomic_xor")
+   protected final int atomicXor(AtomicInteger p, int val) {
+	   return p.getAndAccumulate(val, xorOperator);
+   }
+
    /**
     * Wait for all kernels in the current group to rendezvous at this call before continuing execution.
     *
@@ -2661,8 +2767,13 @@ public abstract class Kernel implements Cloneable {
       final String strRetClass = retClass.toString();
       final String mapping = typeToLetterMap.get(strRetClass);
       // System.out.println("strRetClass = <" + strRetClass + ">, mapping = " + mapping);
-      if (mapping == null)
-         return "[" + retClass.getName() + ";";
+      if (mapping == null) {
+	  if (retClass.isArray()) {
+             return "[" + retClass.getName() + ";";
+    	  } else {
+             return "L" + retClass.getName() + ";";
+    	  }
+      }
       return mapping;
    }
 
@@ -3348,7 +3459,7 @@ public abstract class Kernel implements Cloneable {
 
    private static String toSignature(MethodReferenceEntry methodReferenceEntry) {
       NameAndTypeEntry nameAndTypeEntry = methodReferenceEntry.getNameAndTypeEntry();
-      return nameAndTypeEntry.getNameUTF8Entry().getUTF8() + nameAndTypeEntry.getDescriptorUTF8Entry().getUTF8();
+      return nameAndTypeEntry.getNameUTF8Entry().getUTF8().replace('/', '.') + nameAndTypeEntry.getDescriptorUTF8Entry().getUTF8().replace('/', '.');
    }
 
    private static final ValueCache<Class<?>, Map<String, String>, RuntimeException> mappedMethodNamesCache = cacheProperty(new ValueComputer<Class<?>, Map<String, String>>() {
diff --git a/src/main/java/com/aparapi/internal/kernel/KernelRunner.java b/src/main/java/com/aparapi/internal/kernel/KernelRunner.java
index 66f8e72b1bf269553a99494bb5043faeda228ff4..04de29c21247e8c9f659b3c13a8b4edad2f2066b 100644
--- a/src/main/java/com/aparapi/internal/kernel/KernelRunner.java
+++ b/src/main/java/com/aparapi/internal/kernel/KernelRunner.java
@@ -70,6 +70,7 @@ import java.nio.*;
 import java.util.*;
 import java.util.concurrent.*;
 import java.util.concurrent.ForkJoinPool.*;
+import java.util.concurrent.atomic.AtomicInteger;
 import java.util.logging.*;
 
 /**
@@ -943,12 +944,149 @@ public class KernelRunner extends KernelRunnerJNI{
    private void restoreObjects() throws AparapiException {
       for (int i = 0; i < argc; i++) {
          final KernelArg arg = args[i];
-         if ((arg.getType() & ARG_OBJ_ARRAY_STRUCT) != 0) {
+         if (arg.getField().getType() == AtomicInteger[].class) {
+            extractAtomicIntegerConversionBuffer(arg); 
+         } else if ((arg.getType() & ARG_OBJ_ARRAY_STRUCT) != 0) {
             extractOopConversionBuffer(arg);
          }
       }
    }
 
+   private boolean prepareAtomicIntegerConversionBuffer(KernelArg arg) throws AparapiException {
+      usesOopConversion = true;
+      final Class<?> arrayClass = arg.getField().getType();
+      ClassModel c = null;
+      boolean didReallocate = false;
+
+      if (arg.getObjArrayElementModel() == null) {
+         final String tmp = arrayClass.getName().substring(2).replace('/', '.');
+         final String arrayClassInDotForm = tmp.substring(0, tmp.length() - 1);
+
+         if (logger.isLoggable(Level.FINE)) {
+            logger.fine("looking for type = " + arrayClassInDotForm);
+         }
+
+         // get ClassModel of obj array from entrypt.objectArrayFieldsClasses
+         c = entryPoint.getObjectArrayFieldsClasses().get(arrayClassInDotForm);
+         arg.setObjArrayElementModel(c);
+      } else {
+         c = arg.getObjArrayElementModel();
+      }
+      assert c != null : "should find class for elements " + arrayClass.getName();
+
+      if (logger.isLoggable(Level.FINEST)) {
+         logger.finest("Syncing obj array type = " + arrayClass + " cvtd= " + c.getClassWeAreModelling().getName());
+      }
+
+      int objArraySize = 0;
+      Object newRef = null;
+      try {
+         newRef = arg.getField().get(kernel);
+         objArraySize = Array.getLength(newRef);
+      } catch (final IllegalAccessException e) {
+         throw new AparapiException(e);
+      }
+
+      assert (newRef != null) && (objArraySize != 0) : "no data";
+
+      final int totalStructSize = Integer.BYTES;
+      final int totalBufferSize = objArraySize * totalStructSize;
+
+      // allocate ByteBuffer if first time or array changed
+      if ((arg.getObjArrayBuffer() == null) || (newRef != arg.getArray())) {
+         final ByteBuffer structBuffer = ByteBuffer.allocate(totalBufferSize);
+         arg.setObjArrayByteBuffer(structBuffer.order(ByteOrder.LITTLE_ENDIAN));
+         arg.setObjArrayBuffer(arg.getObjArrayByteBuffer().array());
+         didReallocate = true;
+         if (logger.isLoggable(Level.FINEST)) {
+            logger.finest("objArraySize = " + objArraySize + " totalStructSize= " + totalStructSize + " totalBufferSize="
+                  + totalBufferSize);
+         }
+      } else {
+         arg.getObjArrayByteBuffer().clear();
+      }
+
+      AtomicInteger[] atomic = (AtomicInteger[])newRef;
+      
+      // copy the fields that the JNI uses
+      arg.setJavaArray(arg.getObjArrayBuffer());
+      arg.setNumElements(objArraySize);
+      arg.setSizeInBytes(totalBufferSize);
+
+      int sizeWritten = 0;
+      for (int j = 0; j < objArraySize; j++) {
+         arg.getObjArrayByteBuffer().putInt(atomic[j].get());
+         sizeWritten += Integer.BYTES;
+
+         // add padding here if needed
+         if (logger.isLoggable(Level.FINEST)) {
+            logger.finest("sizeWritten = " + sizeWritten + " totalStructSize= " + totalStructSize);
+         }
+      }
+      assert sizeWritten <= totalBufferSize : "wrote too much into buffer";
+
+      while (sizeWritten < totalBufferSize) {
+         if (logger.isLoggable(Level.FINEST)) {
+            logger.finest(arg.getName() + " struct pad byte = " + sizeWritten + " totalStructSize= " + totalStructSize);
+          }
+          arg.getObjArrayByteBuffer().put((byte) -1);
+          sizeWritten++;
+      }
+
+      assert arg.getObjArrayByteBuffer().arrayOffset() == 0 : "should be zero";
+
+      return didReallocate;	
+   }
+
+   private void extractAtomicIntegerConversionBuffer(KernelArg arg) throws AparapiException {
+      final Class<?> arrayClass = arg.getField().getType();
+      final ClassModel c = arg.getObjArrayElementModel();
+      assert c != null : "should find class for elements: " + arrayClass.getName();
+      assert arg.getArray() != null : "array is null";
+
+      if (logger.isLoggable(Level.FINEST)) {
+         logger.finest("Syncing field:" + arg.getName() + ", bb=" + arg.getObjArrayByteBuffer() + ", type = " + arrayClass);
+      }
+
+      int objArraySize = 0;
+      try {
+         objArraySize = Array.getLength(arg.getField().get(kernel));
+      } catch (final IllegalAccessException e) {
+         throw new AparapiException(e);
+      }
+
+      assert objArraySize > 0 : "should be > 0";
+
+      final int totalStructSize = Integer.BYTES;
+      final int totalBufferSize = objArraySize * totalStructSize;
+      // assert arg.objArrayBuffer.length == totalBufferSize : "size should match";
+
+      arg.getObjArrayByteBuffer().rewind();
+      
+      AtomicInteger[] atomics = (AtomicInteger[])arg.getArray();
+
+      int sizeWritten = 0;
+      for (int j = 0; j < objArraySize; j++) {
+         // read int value from buffer and store into obj in the array
+         final int x = arg.getObjArrayByteBuffer().getInt();
+         atomics[j].set(x);
+         
+         sizeWritten += Integer.BYTES;
+
+         // add padding here if needed
+         if (logger.isLoggable(Level.FINEST)) {
+            logger.finest("sizeWritten = " + sizeWritten + " totalStructSize= " + totalStructSize);
+         }
+      }
+      assert sizeWritten <= totalBufferSize : "wrote too much into buffer";
+
+      while (sizeWritten < totalBufferSize) {
+         // skip over pad bytes
+         arg.getObjArrayByteBuffer().get();
+         sizeWritten++;
+      }
+   }
+
    private boolean updateKernelArrayRefs() throws AparapiException {
       boolean needsSync = false;
 
@@ -976,7 +1114,9 @@ public class KernelRunner extends KernelRunnerJNI{
                   }
                }
 
-               if ((arg.getType() & ARG_OBJ_ARRAY_STRUCT) != 0) {
+               if (arg.getField().getType() == AtomicInteger[].class) {
+            	  prepareAtomicIntegerConversionBuffer(arg);
+               } else if ((arg.getType() & ARG_OBJ_ARRAY_STRUCT) != 0) {
                   prepareOopConversionBuffer(arg);
                } else {
                   // set up JNI fields for normal arrays
diff --git a/src/main/java/com/aparapi/internal/model/ClassModel.java b/src/main/java/com/aparapi/internal/model/ClassModel.java
index 73415a267cf3184f68c54c1eefa696b254913be7..fe80b75d5d173677f6fbc381f54fcb45efa2fa85 100644
--- a/src/main/java/com/aparapi/internal/model/ClassModel.java
+++ b/src/main/java/com/aparapi/internal/model/ClassModel.java
@@ -2769,7 +2769,12 @@ public class ClassModel {
    public void parse(Class<?> _class) throws ClassParseException {
 
       clazz = _class;
-      parse(_class.getClassLoader(), _class.getName());
+      //It is needed to load AtomicInteger class and Should also fix Issue #6 - NPE while getting Math.class
+      ClassLoader loader = _class.getClassLoader();
+      if (loader == null) {
+    	  loader = ClassLoader.getSystemClassLoader().getParent();
+      }
+      parse(loader, _class.getName());
    }
 
    /**
diff --git a/src/main/java/com/aparapi/internal/model/Entrypoint.java b/src/main/java/com/aparapi/internal/model/Entrypoint.java
index 3a291b2431855b979e9e625507869bf6fa9e80d4..4cdc569fdfadc1ff0f7b476484ae487e9517f2e2 100644
--- a/src/main/java/com/aparapi/internal/model/Entrypoint.java
+++ b/src/main/java/com/aparapi/internal/model/Entrypoint.java
@@ -63,6 +63,7 @@ import com.aparapi.internal.util.*;
 
 import java.lang.reflect.*;
 import java.util.*;
+import java.util.concurrent.atomic.AtomicInteger;
 import java.util.logging.*;
 
 public class Entrypoint implements Cloneable {
@@ -174,7 +175,7 @@ public class Entrypoint implements Cloneable {
       try {
          field = _clazz.getDeclaredField(_name);
          final Class<?> type = field.getType();
-         if (type.isPrimitive() || type.isArray()) {
+         if (type.isPrimitive() || type.isArray() || type.equals(AtomicInteger.class)) {
             return field;
          }
          if (field.getAnnotation(Kernel.NoCL.class) != null) {
@@ -608,7 +609,7 @@ public class Entrypoint implements Cloneable {
                   if (arrayFieldModel != null) {
                      final Class<?> memberClass = arrayFieldModel.getClassWeAreModelling();
                      final int modifiers = memberClass.getModifiers();
-                     if (!Modifier.isFinal(modifiers)) {
+                     if (memberClass != AtomicInteger.class && !Modifier.isFinal(modifiers)) {
                         throw new ClassParseException(ClassParseException.TYPE.ACCESSEDOBJECTNONFINAL);
                      }
 
diff --git a/src/main/java/com/aparapi/internal/writer/KernelWriter.java b/src/main/java/com/aparapi/internal/writer/KernelWriter.java
index 31d6629db3f3567225741c44f26e94bab90d9520..25c89cbe979104e5f27f44c30d1cd8cfbfe1a2e7 100644
--- a/src/main/java/com/aparapi/internal/writer/KernelWriter.java
+++ b/src/main/java/com/aparapi/internal/writer/KernelWriter.java
@@ -64,6 +64,7 @@ import com.aparapi.internal.model.ClassModel.AttributePool.RuntimeAnnotationsEnt
 import com.aparapi.internal.model.ClassModel.AttributePool.RuntimeParameterAnnotationsEntry.ParameterInfo;
 import com.aparapi.internal.model.ClassModel.*;
 import com.aparapi.internal.model.ClassModel.ConstantPool.*;
+import com.aparapi.internal.model.ClassModel.ConstantPool.MethodReferenceEntry.Arg;
 
 import java.util.*;
 
@@ -193,6 +194,9 @@ public abstract class KernelWriter extends BlockWriter{
          return isLocal ? (cvtLongArrayToLong) : (cvtLongArrayToLongStar);
       } else if (_typeDesc.equals("[S") || _typeDesc.equals("short[]")) {
          return isLocal ? (cvtShortArrayToShort) : (cvtShortArrayToShortStar);
+      } else if ("[Ljava/util/concurrent/atomic/AtomicInteger;".equals(_typeDesc) ||
+    		  "[Ljava.util.concurrent.atomic.AtomicInteger;".equals(_typeDesc)) {
+    	 return (cvtIntArrayToIntStar);
       }
       // if we get this far, we haven't matched anything yet
       if (useClassModel) {
@@ -291,6 +295,12 @@ public abstract class KernelWriter extends BlockWriter{
             if (((intrinsicMapping == null) && (_methodCall instanceof VirtualMethodCall) && (!isIntrinsic)) || (arg != 0)) {
                write(", ");
             }
+
+            Arg methodArg = _methodEntry.getArgs()[arg];
+            if (!methodArg.isArray() && "Ljava/util/concurrent/atomic/AtomicInteger;".equals(methodArg.getType())) {
+            	write("&");
+            }
+ 
             writeInstruction(_methodCall.getArg(arg));
          }
          write(")");
@@ -383,7 +393,10 @@ public abstract class KernelWriter extends BlockWriter{
 
          // If it is a converted array of objects, emit the struct param
          String className = null;
-         if (signature.startsWith("L")) {
+	 if (signature.equals("Ljava/util/concurrent/atomic/AtomicInteger;")) {
+            argLine.append("int");
+            thisStructLine.append("int");
+         } else if (signature.startsWith("L")) {
             // Turn Lcom/codegen/javalabs/opencl/demo/DummyOOA; into com_amd_javalabs_opencl_demo_DummyOOA for example
             className = (signature.substring(1, signature.length() - 1)).replace('/', '_');
             // if (logger.isLoggable(Level.FINE)) {
@@ -497,6 +510,12 @@ public abstract class KernelWriter extends BlockWriter{
       }
 
       if (usesAtomics) {
+         write("#define atomicGet(p) (*p)");
+         newLine();
+         
+         write("#define atomicSet(p, val) (*p=val)");
+         newLine();
+
          write("int atomicAdd(__global int *_arr, int _index, int _delta){");
          in();
          {
diff --git a/src/test/java/com/aparapi/codegen/test/Atomic32PragmaTest.java b/src/test/java/com/aparapi/codegen/test/Atomic32PragmaTest.java
index 070d39a11a73c6dd145f43cbf271301f55f65f07..714943b76b6c88761c20f758ac6352e4d41c4f13 100644
--- a/src/test/java/com/aparapi/codegen/test/Atomic32PragmaTest.java
+++ b/src/test/java/com/aparapi/codegen/test/Atomic32PragmaTest.java
@@ -23,6 +23,8 @@ public class Atomic32PragmaTest extends com.aparapi.codegen.CodeGenJUnitBase {
 " #pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics : enable\n" +
 " #pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable\n" +
 " #pragma OPENCL EXTENSION cl_khr_local_int32_extended_atomics : enable\n" +
+" #define atomicGet(p) (*p)\n" + 
+" #define atomicSet(p, val) (*p=val)\n" +
 " int atomicAdd(__global int *_arr, int _index, int _delta){\n" +
 " return atomic_add(&_arr[_index], _delta);\n" +
 " }\n" +
diff --git a/src/test/java/com/aparapi/runtime/Issue81AtomicsSupportTest.java b/src/test/java/com/aparapi/runtime/Issue81AtomicsSupportTest.java
new file mode 100644
index 0000000000000000000000000000000000000000..830e0f45ce0f86eed70243175ecb7a7f1c8c1f05
--- /dev/null
+++ b/src/test/java/com/aparapi/runtime/Issue81AtomicsSupportTest.java
@@ -0,0 +1,1513 @@
+/**
+ * Copyright (c) 2016 - 2017 Syncleus, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.aparapi.runtime;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assume.assumeTrue;
+
+import java.util.Arrays;
+import java.util.LinkedHashSet;
+import java.util.List;
+import java.util.concurrent.atomic.AtomicInteger;
+
+import org.junit.Before;
+import org.junit.Test;
+
+import com.aparapi.Kernel;
+import com.aparapi.Range;
+import com.aparapi.device.Device;
+import com.aparapi.device.JavaDevice;
+import com.aparapi.device.OpenCLDevice;
+import com.aparapi.internal.kernel.KernelManager;
+
+public class Issue81AtomicsSupportTest {
+
+    private static OpenCLDevice openCLDevice = null;
+
+    private static final int SIZE = 100;
+	private final static int LOCK_IDX = 3;
+	private final static int MAX_VAL_IDX = 0;
+	private final static int MAX_POS_LEFT_IDX = 1;
+	private final static int MAX_POS_RIGHT_IDX = 2;
+
+    private class CLKernelManager extends KernelManager {
+    	@Override
+    	protected List<Device.TYPE> getPreferredDeviceTypes() {
+    		return Arrays.asList(Device.TYPE.ACC, Device.TYPE.GPU, Device.TYPE.CPU);
+    	}
+    }
+    
+    private class JTPKernelManager extends KernelManager {
+    	private JTPKernelManager() {
+    		LinkedHashSet<Device> preferredDevices = new LinkedHashSet<Device>(1);
+    		preferredDevices.add(JavaDevice.THREAD_POOL);
+    		setDefaultPreferredDevices(preferredDevices);
+    	}
+    	@Override
+    	protected List<Device.TYPE> getPreferredDeviceTypes() {
+    		return Arrays.asList(Device.TYPE.JTP);
+    	}
+    }
+    
+    @Before
+    public void setUpBeforeClass() throws Exception {
+    	KernelManager.setKernelManager(new CLKernelManager());
+        Device device = KernelManager.instance().bestDevice();
+        assumeTrue (device != null && device instanceof OpenCLDevice);
+        openCLDevice = (OpenCLDevice) device;
+    }
+
+    @Test
+    public void issue81OpenCLExplicit() {
+    	final int in[] = new int[SIZE];
+    	
+    	final int[] out = new int[3];
+    	for (int i = 0; i < SIZE/2; i++) {
+    		in[i] = i;
+    		in[i + SIZE/2] = SIZE - i;
+    	}
+    	in[10] = SIZE;
+    	
+        final AtomicKernel kernel = new AtomicKernel(in, out);
+        try {
+	        final Range range = openCLDevice.createRange(SIZE/2, SIZE/2);
+	        kernel.setExplicit(true);
+	        kernel.put(in);
+	        kernel.execute(range);
+	        kernel.get(out);
+        } finally {
+        	kernel.dispose();
+        }
+
+        assertEquals("Max value doesn't match", 100, out[0]);
+        assertTrue("Left max found at unexpected position: " + out[MAX_POS_LEFT_IDX], out[MAX_POS_LEFT_IDX] == 10 || out[MAX_POS_LEFT_IDX] == 50);
+        assertTrue("Right max found at unexpected position: " + out[MAX_POS_RIGHT_IDX], out[MAX_POS_RIGHT_IDX] == 100-10 || out[MAX_POS_RIGHT_IDX] == 100-50);
+    }
+    
+    @Test
+    public void issue81OpenCL() {
+    	final int in[] = new int[SIZE];
+    	
+    	final int[] out = new int[3];
+    	for (int i = 0; i < SIZE/2; i++) {
+    		in[i] = i;
+    		in[i + SIZE/2] = SIZE - i;
+    	}
+    	in[10] = SIZE;
+    	
+        final AtomicKernel kernel = new AtomicKernel(in, out);
+        try {
+	        final Range range = openCLDevice.createRange(SIZE/2, SIZE/2);
+	        kernel.execute(range);
+        } finally {
+        	kernel.dispose();
+        }
+
+        assertEquals("Max value doesn't match", 100, out[0]);
+        assertTrue("Left max found at unexpected position: " + out[MAX_POS_LEFT_IDX], out[MAX_POS_LEFT_IDX] == 10 || out[MAX_POS_LEFT_IDX] == 50);
+        assertTrue("Right max found at unexpected position: " + out[MAX_POS_RIGHT_IDX], out[MAX_POS_RIGHT_IDX] == 100-10 || out[MAX_POS_RIGHT_IDX] == 100-50);
+    }
+    
+    @Test
+    public void issue81JTP() {
+    	KernelManager.setKernelManager(new JTPKernelManager());
+    	Device device = KernelManager.instance().bestDevice();
+    	final int in[] = new int[SIZE];
+    	
+    	final int[] out = new int[3];
+    	for (int i = 0; i < SIZE/2; i++) {
+    		in[i] = i;
+    		in[i + SIZE/2] = SIZE - i;
+    	}
+    	in[10] = SIZE;
+    	
+        final AtomicKernel kernel = new AtomicKernel(in, out);
+        try {
+	        final Range range = device.createRange(SIZE/2, SIZE/2);
+	        kernel.execute(range);
+        } finally {
+        	kernel.dispose();
+        }
+        assertEquals("Max value doesn't match", 100, out[0]);
+        assertTrue("Left max found at unexpected position: " + out[MAX_POS_LEFT_IDX], out[MAX_POS_LEFT_IDX] == 10 || out[MAX_POS_LEFT_IDX] == 50);
+        assertTrue("Right max found at unexpected position: " + out[MAX_POS_RIGHT_IDX], out[MAX_POS_RIGHT_IDX] == 100-10 || out[MAX_POS_RIGHT_IDX] == 100-50);
+    }
+    
+    @Test
+    public void issue81BOpenCL() {
+    	final int in[] = new int[SIZE];
+    	final AtomicInteger[] out = new AtomicInteger[3];
+    	for (int i = 0; i < out.length; i++) {
+    		out[i] = new AtomicInteger(0);
+    	}
+    	for (int i = 0; i < SIZE/2; i++) {
+    		in[i] = i;
+    		in[i + SIZE/2] = SIZE - i;
+    	}
+    	in[10] = SIZE;
+    	
+        final AtomicBKernel kernel = new AtomicBKernel(in, out);
+        try {
+	        final Range range = openCLDevice.createRange(SIZE/2, SIZE/2);
+	        kernel.execute(range);
+        } finally {
+        	kernel.dispose();
+        }
+
+        assertEquals("Max value doesn't match", 100, out[0].get());
+        assertTrue("Left max found at unexpected position: " + out[MAX_POS_LEFT_IDX], out[MAX_POS_LEFT_IDX].get() == 10 || out[MAX_POS_LEFT_IDX].get() == 50);
+        assertTrue("Right max found at unexpected position: " + out[MAX_POS_RIGHT_IDX], out[MAX_POS_RIGHT_IDX].get() == 100-10 || out[MAX_POS_RIGHT_IDX].get() == 100-50);
+    }
+        
+    @Test
+    public void issue81BJTP() {
+    	KernelManager.setKernelManager(new JTPKernelManager());
+    	Device device = KernelManager.instance().bestDevice();
+    	final int in[] = new int[SIZE];
+    	final AtomicInteger[] out = new AtomicInteger[3];
+    	for (int i = 0; i < out.length; i++) {
+    		out[i] = new AtomicInteger(0);
+    	}
+    	for (int i = 0; i < SIZE/2; i++) {
+    		in[i] = i;
+    		in[i + SIZE/2] = SIZE - i;
+    	}
+    	in[10] = SIZE;
+    	
+        final AtomicBKernel kernel = new AtomicBKernel(in, out);
+        try {
+	        final Range range = device.createRange(SIZE/2, SIZE/2);
+	        kernel.execute(range);
+        } finally {
+        	kernel.dispose();
+        }
+        assertEquals("Max value doesn't match", 100, out[0].get());
+        assertTrue("Left max found at unexpected position: " + out[MAX_POS_LEFT_IDX], out[MAX_POS_LEFT_IDX].get() == 10 || out[MAX_POS_LEFT_IDX].get() == 50);
+        assertTrue("Right max found at unexpected position: " + out[MAX_POS_RIGHT_IDX], out[MAX_POS_RIGHT_IDX].get() == 100-10 || out[MAX_POS_RIGHT_IDX].get() == 100-50);
+    }    
+
+    
+    @Test
+    public void issue81AtomicAddOpenCLExplicit() {
+    	Device openCLDevice = KernelManager.instance().bestDevice();
+    	final int in[] = new int[2];
+    	final int[] out = new int[2];
+    	in[0] = 10;
+    	in[1] = 20;
+    	
+    	final AtomicAdd kernel = new AtomicAdd(in, out);
+    	try {
+	    	final Range range = openCLDevice.createRange(1,1);
+	        kernel.setExplicit(true);
+	        kernel.put(in);
+	        kernel.execute(range);
+	        kernel.get(out);
+    	} finally {
+    		kernel.dispose();
+    	}
+    	assertEquals("Old value doesn't match", in[0], out[0]);
+    	assertEquals("Final value doesn't match", in[0] + in[1], out[1]);
+    }
+
+    @Test
+    public void issue81AtomicAddOpenCL() {
+    	Device openCLDevice = KernelManager.instance().bestDevice();
+    	final int in[] = new int[2];
+    	final int[] out = new int[2];
+    	in[0] = 10;
+    	in[1] = 20;
+    	
+    	final AtomicAdd kernel = new AtomicAdd(in, out);
+    	try {
+	    	final Range range = openCLDevice.createRange(1,1);
+	        kernel.execute(range);
+    	} finally {
+    		kernel.dispose();
+    	}
+    	assertEquals("Old value doesn't match", in[0], out[0]);
+    	assertEquals("Final value doesn't match", in[0] + in[1], out[1]);
+    }
+
+    @Test
+    public void issue81AtomicAddJTP() {
+    	KernelManager.setKernelManager(new JTPKernelManager());
+    	Device device = KernelManager.instance().bestDevice();
+    	final int in[] = new int[2];
+    	final int[] out = new int[2];
+    	in[0] = 10;
+    	in[1] = 20;
+    	
+    	final AtomicAdd kernel = new AtomicAdd(in, out);
+    	try {
+	    	final Range range = device.createRange(1,1);
+	    	kernel.execute(range);
+    	} finally {
+    		kernel.dispose();
+    	}
+    	assertEquals("Old value doesn't match", in[0], out[0]);
+    	assertEquals("Final value doesn't match", in[0] + in[1], out[1]);
+    }
+
+    /**
+     * Kernel for single threaded validation of atomicAdd.
+     * Validates that a add operation is actually performed.
+     * @author lpnm
+     *
+     */
+    private static final class AtomicAdd extends Kernel {
+    	private int in[];
+    	private int out[];
+    	
+    	public AtomicAdd(int[] in, int out[]) {
+    		this.in = in;
+    		this.out = out;
+    		atomicValues = new AtomicInteger[2];
+    		atomicValues[0] = new AtomicInteger(0);
+    		atomicValues[1] = new AtomicInteger(0);
+    	}
+
+    	@Local
+    	private AtomicInteger atomicValues[];
+    	
+		@Override
+		public void run() {
+			atomicSet(atomicValues[0], in[0]);
+			out[0] = atomicAdd(atomicValues[0], in[1]);
+			out[1] = atomicGet(atomicValues[0]);
+		}
+    }
+
+    @Test
+    public void issue81AtomicSubOpenCLExplicit() {
+    	Device openCLDevice = KernelManager.instance().bestDevice();
+    	final int in[] = new int[2];
+    	final int[] out = new int[2];
+    	in[0] = 10;
+    	in[1] = 20;
+    	
+    	final AtomicSub kernel = new AtomicSub(in, out);
+    	try {
+	    	final Range range = openCLDevice.createRange(1,1);
+	        kernel.setExplicit(true);
+	        kernel.put(in);
+	        kernel.execute(range);
+	        kernel.get(out);
+    	} finally {
+    		kernel.dispose();
+    	}
+    	assertEquals("Old value doesn't match", in[0], out[0]);
+    	assertEquals("Final value doesn't match", in[0] - in[1], out[1]);
+    }
+
+    @Test
+    public void issue81AtomicSubOpenCL() {
+    	Device openCLDevice = KernelManager.instance().bestDevice();
+    	final int in[] = new int[2];
+    	final int[] out = new int[2];
+    	in[0] = 10;
+    	in[1] = 20;
+    	
+    	final AtomicSub kernel = new AtomicSub(in, out);
+    	try {
+	    	final Range range = openCLDevice.createRange(1,1);
+	        kernel.execute(range);
+    	} finally {
+    		kernel.dispose();
+    	}
+    	assertEquals("Old value doesn't match", in[0], out[0]);
+    	assertEquals("Final value doesn't match", in[0] - in[1], out[1]);
+    }
+
+    @Test
+    public void issue81AtomicSubJTP() {
+    	KernelManager.setKernelManager(new JTPKernelManager());
+    	Device device = KernelManager.instance().bestDevice();
+    	final int in[] = new int[2];
+    	final int[] out = new int[2];
+    	in[0] = 10;
+    	in[1] = 20;
+    	
+    	final AtomicSub kernel = new AtomicSub(in, out);
+    	try {
+	    	final Range range = device.createRange(1,1);
+	    	kernel.execute(range);
+    	} finally {
+    		kernel.dispose();
+    	}
+    	assertEquals("Old value doesn't match", in[0], out[0]);
+    	assertEquals("Final value doesn't match", in[0] - in[1], out[1]);
+    }
+    
+    /**
+     * Kernel for single threaded validation of atomicSub.
+     * Validates that a subtraction operation is actually performed.
+     * @author lpnm
+     *
+     */
+    private static final class AtomicSub extends Kernel {
+    	private int in[];
+    	private int out[];
+    	
+    	public AtomicSub(int[] in, int out[]) {
+    		this.in = in;
+    		this.out = out;
+    		atomicValues = new AtomicInteger[2];
+    		atomicValues[0] = new AtomicInteger(0);
+    		atomicValues[1] = new AtomicInteger(0);
+    	}
+
+    	@Local
+    	private AtomicInteger atomicValues[];
+    	
+		@Override
+		public void run() {
+			atomicSet(atomicValues[0], in[0]);
+			out[0] = atomicSub(atomicValues[0], in[1]);
+			out[1] = atomicGet(atomicValues[0]);			
+		}
+
+    }
+
+    @Test
+    public void issue81AtomicXchgOpenCLExplicit() {
+    	Device openCLDevice = KernelManager.instance().bestDevice();
+    	final int in[] = new int[2];
+    	final int[] out = new int[2];
+    	in[0] = 10;
+    	in[1] = 20;
+    	
+    	final AtomicXchg kernel = new AtomicXchg(in, out);
+    	try {
+	    	final Range range = openCLDevice.createRange(1,1);
+	        kernel.setExplicit(true);
+	        kernel.put(in);
+	        kernel.execute(range);
+	        kernel.get(out);
+    	} finally {
+    		kernel.dispose();
+    	}
+    	assertEquals("Old value doesn't match", in[0], out[0]);
+    	assertEquals("Final value doesn't match", in[1], out[1]);
+    }
+
+    @Test
+    public void issue81AtomicXchgOpenCL() {
+    	Device openCLDevice = KernelManager.instance().bestDevice();
+    	final int in[] = new int[2];
+    	final int[] out = new int[2];
+    	in[0] = 10;
+    	in[1] = 20;
+    	
+    	final AtomicXchg kernel = new AtomicXchg(in, out);
+    	try {
+	    	final Range range = openCLDevice.createRange(1,1);
+	        kernel.execute(range);
+    	} finally {
+    		kernel.dispose();
+    	}
+    	assertEquals("Old value doesn't match", in[0], out[0]);
+    	assertEquals("Final value doesn't match", in[1], out[1]);
+    }
+    
+    @Test
+    public void issue81AtomicXchgJTP() {
+    	KernelManager.setKernelManager(new JTPKernelManager());
+    	Device device = KernelManager.instance().bestDevice();
+    	final int in[] = new int[2];
+    	final int[] out = new int[2];
+    	in[0] = 10;
+    	in[1] = 20;
+    	
+    	final AtomicXchg kernel = new AtomicXchg(in, out);
+    	try {
+	    	final Range range = device.createRange(1,1);
+	    	kernel.execute(range);
+    	} finally {
+    		kernel.dispose();
+    	}
+    	assertEquals("Old value doesn't match", in[0], out[0]);
+    	assertEquals("Final value doesn't match", in[1], out[1]);
+    }
+
+    /**
+     * Kernel for single threaded validation of atomicXchg.
+     * Validates that a value exchange operation is actually performed.
+     * @author lpnm
+     *
+     */
+    private static final class AtomicXchg extends Kernel {
+    	private int in[];
+    	private int out[];
+    	
+    	public AtomicXchg(int[] in, int out[]) {
+    		this.in = in;
+    		this.out = out;
+    		atomicValues = new AtomicInteger[2];
+    		atomicValues[0] = new AtomicInteger(0);
+    		atomicValues[1] = new AtomicInteger(0);
+    	}
+
+    	@Local
+    	private AtomicInteger atomicValues[];
+    	
+		@Override
+		public void run() {
+			atomicSet(atomicValues[0], in[0]);
+			out[0] = atomicXchg(atomicValues[0], in[1]);
+			out[1] = atomicGet(atomicValues[0]);
+		}
+
+    }
+    
+    @Test
+    public void issue81AtomicIncOpenCLExplicit() {
+    	Device openCLDevice = KernelManager.instance().bestDevice();
+    	final int in[] = new int[1];
+    	final int[] out = new int[2];
+    	in[0] = 50;
+    	
+    	final AtomicInc kernel = new AtomicInc(in, out);
+    	try {
+	    	final Range range = openCLDevice.createRange(1,1);
+	        kernel.setExplicit(true);
+	        kernel.put(in);
+	        kernel.execute(range);
+	        kernel.get(out);
+    	} finally {
+    		kernel.dispose();
+    	}
+    	assertEquals("Old value doesn't match", in[0], out[0]);
+    	assertEquals("Final value doesn't match", in[0] + 1, out[1]);
+    }
+
+    @Test
+    public void issue81AtomicIncOpenCL() {
+    	Device openCLDevice = KernelManager.instance().bestDevice();
+    	final int in[] = new int[1];
+    	final int[] out = new int[2];
+    	in[0] = 50;
+    	
+    	final AtomicInc kernel = new AtomicInc(in, out);
+    	try {
+	    	final Range range = openCLDevice.createRange(1,1);
+	        kernel.execute(range);
+    	} finally {
+    		kernel.dispose();
+    	}
+    	assertEquals("Old value doesn't match", in[0], out[0]);
+    	assertEquals("Final value doesn't match", in[0] + 1, out[1]);
+    }
+    
+    @Test
+    public void issue81AtomicInc() {
+    	KernelManager.setKernelManager(new JTPKernelManager());
+    	Device device = KernelManager.instance().bestDevice();
+    	final int in[] = new int[1];
+    	final int[] out = new int[2];
+    	in[0] = 50;
+    	
+    	final AtomicInc kernel = new AtomicInc(in, out);
+    	try {
+	    	final Range range = device.createRange(1,1);
+	    	kernel.execute(range);
+    	} finally {
+    		kernel.dispose();
+    	}
+    	assertEquals("Old value doesn't match", in[0], out[0]);
+    	assertEquals("Final value doesn't match", in[0] + 1, out[1]);
+    }
+
+    /**
+     * Kernel for single threaded validation of atomicInc.
+     * Validates that an increment operation is actually performed.
+     * @author lpnm
+     *
+     */
+    private static final class AtomicInc extends Kernel {
+    	private int in[];
+    	private int out[];
+    	
+    	public AtomicInc(int[] in, int out[]) {
+    		this.in = in;
+    		this.out = out;
+    		atomicValues = new AtomicInteger[2];
+    		atomicValues[0] = new AtomicInteger(0);
+    		atomicValues[1] = new AtomicInteger(0);
+    	}
+
+    	@Local
+    	private AtomicInteger atomicValues[];
+    	
+		@Override
+		public void run() {
+			atomicSet(atomicValues[0], in[0]);
+			out[0] = atomicInc(atomicValues[0]);
+			out[1] = atomicGet(atomicValues[0]);
+		}
+
+    }
+
+    @Test
+    public void issue81AtomicDecOpenCLExplicit() {
+    	Device openCLDevice = KernelManager.instance().bestDevice();
+    	final int in[] = new int[1];
+    	final int[] out = new int[2];
+    	in[0] = 50;
+    	
+    	final AtomicDec kernel = new AtomicDec(in, out);
+    	try {
+	    	final Range range = openCLDevice.createRange(1,1);
+	        kernel.setExplicit(true);
+	        kernel.put(in);
+	        kernel.execute(range);
+	        kernel.get(out);
+    	} finally {
+    		kernel.dispose();
+    	}
+    	assertEquals("Old value doesn't match", in[0], out[0]);
+    	assertEquals("Final value doesn't match", in[0] - 1, out[1]);
+    }
+
+    @Test
+    public void issue81AtomicDecOpenCL() {
+    	Device openCLDevice = KernelManager.instance().bestDevice();
+    	final int in[] = new int[1];
+    	final int[] out = new int[2];
+    	in[0] = 50;
+    	
+    	final AtomicDec kernel = new AtomicDec(in, out);
+    	try {
+	    	final Range range = openCLDevice.createRange(1,1);
+	        kernel.execute(range);
+    	} finally {
+    		kernel.dispose();
+    	}
+    	assertEquals("Old value doesn't match", in[0], out[0]);
+    	assertEquals("Final value doesn't match", in[0] - 1, out[1]);
+    }
+
+    @Test
+    public void issue81AtomicDecJTP() {
+    	KernelManager.setKernelManager(new JTPKernelManager());
+    	Device device = KernelManager.instance().bestDevice();
+    	final int in[] = new int[1];
+    	final int[] out = new int[2];
+    	in[0] = 50;
+    	
+    	final AtomicDec kernel = new AtomicDec(in, out);
+    	try {
+	    	final Range range = device.createRange(1,1);
+	    	kernel.execute(range);
+    	} finally {
+    		kernel.dispose();
+    	}
+    	assertEquals("Old value doesn't match", in[0], out[0]);
+    	assertEquals("Final value doesn't match", in[0] - 1, out[1]);
+    }
+
+    /**
+     * Kernel for single threaded validation of atomicDec.
+     * Validates that a decrement operation is actually performed.
+     * @author lpnm
+     *
+     */
+    private static final class AtomicDec extends Kernel {
+    	private int in[];
+    	private int out[];
+    	
+    	public AtomicDec(int[] in, int out[]) {
+    		this.in = in;
+    		this.out = out;
+    		atomicValues = new AtomicInteger[2];
+    		atomicValues[0] = new AtomicInteger(0);
+    		atomicValues[1] = new AtomicInteger(0);
+    	}
+
+    	@Local
+    	private AtomicInteger atomicValues[];
+    	
+		@Override
+		public void run() {
+			atomicSet(atomicValues[0], in[0]);
+			out[0] = atomicDec(atomicValues[0]);
+			out[1] = atomicGet(atomicValues[0]);
+		}
+
+    }
+
+    @Test
+    public void issue81AtomicCmpXchg1OpenCLExplicit() {
+    	Device openCLDevice = KernelManager.instance().bestDevice();
+    	final int in[] = new int[3];
+    	final int[] out = new int[2];
+    	in[0] = 50;
+    	in[1] = 50;
+    	in[2] = 100;
+    	
+    	final AtomicCmpXchg kernel = new AtomicCmpXchg(in, out);
+    	try {
+	    	final Range range = openCLDevice.createRange(1,1);
+	        kernel.setExplicit(true);
+	        kernel.put(in);
+	        kernel.execute(range);
+	        kernel.get(out);
+    	} finally {
+    		kernel.dispose();
+    	}
+    	assertEquals("Old value doesn't match", in[0], out[0]);
+    	assertEquals("Final value doesn't match", in[2], out[1]);
+    }
+
+    @Test
+    public void issue81AtomicCmpXchg1OpenCL() {
+    	Device openCLDevice = KernelManager.instance().bestDevice();
+    	final int in[] = new int[3];
+    	final int[] out = new int[2];
+    	in[0] = 50;
+    	in[1] = 50;
+    	in[2] = 100;
+    	
+    	final AtomicCmpXchg kernel = new AtomicCmpXchg(in, out);
+    	try {
+	    	final Range range = openCLDevice.createRange(1,1);
+	        kernel.execute(range);
+    	} finally {
+    		kernel.dispose();
+    	}
+    	assertEquals("Old value doesn't match", in[0], out[0]);
+    	assertEquals("Final value doesn't match", in[2], out[1]);
+    }
+
+    @Test
+    public void issue81AtomicCmpXchg1JTP() {
+    	KernelManager.setKernelManager(new JTPKernelManager());
+    	Device device = KernelManager.instance().bestDevice();
+    	final int in[] = new int[3];
+    	final int[] out = new int[2];
+    	in[0] = 50;
+    	in[1] = 50;
+    	in[2] = 100;
+    	
+    	final AtomicCmpXchg kernel = new AtomicCmpXchg(in, out);
+    	try {
+	    	final Range range = device.createRange(1,1);
+	    	kernel.execute(range);
+    	} finally {
+    		kernel.dispose();
+    	}
+    	assertEquals("Old value doesn't match", in[0], out[0]);
+    	assertEquals("Final value doesn't match", in[2], out[1]);
+    }
+
+    @Test
+    public void issue81AtomicCmpXchg2OpenCLExplicit() {
+    	Device openCLDevice = KernelManager.instance().bestDevice();
+    	final int in[] = new int[3];
+    	final int[] out = new int[2];
+    	in[0] = 50;
+    	in[1] = 51;
+    	in[2] = 100;
+    	
+    	final AtomicCmpXchg kernel = new AtomicCmpXchg(in, out);
+    	try {
+	    	final Range range = openCLDevice.createRange(1,1);
+	        kernel.setExplicit(true);
+	        kernel.put(in);
+	        kernel.execute(range);
+	        kernel.get(out);
+    	} finally {
+    		kernel.dispose();
+    	}
+    	assertEquals("Old value doesn't match", in[0], out[0]);
+    	assertEquals("Final value doesn't match", in[0], out[1]);
+    }
+
+    @Test
+    public void issue81AtomicCmpXchg2OpenCL() {
+    	Device openCLDevice = KernelManager.instance().bestDevice();
+    	final int in[] = new int[3];
+    	final int[] out = new int[2];
+    	in[0] = 50;
+    	in[1] = 51;
+    	in[2] = 100;
+    	
+    	final AtomicCmpXchg kernel = new AtomicCmpXchg(in, out);
+    	try {
+	    	final Range range = openCLDevice.createRange(1,1);
+	        kernel.execute(range);
+    	} finally {
+    		kernel.dispose();
+    	}
+    	assertEquals("Old value doesn't match", in[0], out[0]);
+    	assertEquals("Final value doesn't match", in[0], out[1]);
+    }
+
+    @Test
+    public void issue81AtomicCmpXchg2JTP() {
+    	KernelManager.setKernelManager(new JTPKernelManager());
+    	Device device = KernelManager.instance().bestDevice();
+    	final int in[] = new int[3];
+    	final int[] out = new int[2];
+    	in[0] = 50;
+    	in[1] = 51;
+    	in[2] = 100;
+    	
+    	final AtomicCmpXchg kernel = new AtomicCmpXchg(in, out);
+    	try {
+	    	final Range range = device.createRange(1,1);
+	    	kernel.execute(range);
+    	} finally {
+    		kernel.dispose();
+    	}
+    	assertEquals("Old value doesn't match", in[0], out[0]);
+    	assertEquals("Final value doesn't match", in[0], out[1]);
+    }
+
+    /**
+     * Kernel for single threaded validation of atomicCmpXchg.
+     * Validates that a cmpXchg operation is actually performed.
+     * @author lpnm
+     *
+     */
+    private static final class AtomicCmpXchg extends Kernel {
+    	private int in[];
+    	private int out[];
+    	
+    	public AtomicCmpXchg(int[] in, int out[]) {
+    		this.in = in;
+    		this.out = out;
+    		atomicValues = new AtomicInteger[2];
+    		atomicValues[0] = new AtomicInteger(0);
+    		atomicValues[1] = new AtomicInteger(0);
+    	}
+
+    	@Local
+    	private AtomicInteger atomicValues[];
+    	
+		@Override
+		public void run() {
+			atomicSet(atomicValues[0], in[0]);
+			out[0] = atomicCmpXchg(atomicValues[0], in[1], in[2]);
+			out[1] = atomicGet(atomicValues[0]);
+		}
+
+    }
+
+    @Test
+    public void issue81AtomicMin1OpenCLExplicit() {
+    	Device openCLDevice = KernelManager.instance().bestDevice();
+    	final int in[] = new int[2];
+    	final int[] out = new int[2];
+    	in[0] = 50;
+    	in[1] = 49;
+    	
+    	final AtomicMin kernel = new AtomicMin(in, out);
+    	try {
+	    	final Range range = openCLDevice.createRange(1,1);
+	        kernel.setExplicit(true);
+	        kernel.put(in);
+	        kernel.execute(range);
+	        kernel.get(out);
+    	} finally {
+    		kernel.dispose();
+    	}
+    	assertEquals("Old value doesn't match", in[0], out[0]);
+    	assertEquals("Final value doesn't match", in[1], out[1]);
+    }
+
+    @Test
+    public void issue81AtomicMin1OpenCL() {
+    	Device openCLDevice = KernelManager.instance().bestDevice();
+    	final int in[] = new int[2];
+    	final int[] out = new int[2];
+    	in[0] = 50;
+    	in[1] = 49;
+    	
+    	final AtomicMin kernel = new AtomicMin(in, out);
+    	try {
+	    	final Range range = openCLDevice.createRange(1,1);
+	        kernel.execute(range);
+    	} finally {
+    		kernel.dispose();
+    	}
+    	assertEquals("Old value doesn't match", in[0], out[0]);
+    	assertEquals("Final value doesn't match", in[1], out[1]);
+    }
+
+    @Test
+    public void issue81AtomicMin1JTP() {
+    	KernelManager.setKernelManager(new JTPKernelManager());
+    	Device device = KernelManager.instance().bestDevice();
+    	final int in[] = new int[2];
+    	final int[] out = new int[2];
+    	in[0] = 50;
+    	in[1] = 49;
+    	
+    	final AtomicMin kernel = new AtomicMin(in, out);
+    	try {
+	    	final Range range = device.createRange(1,1);
+	    	kernel.execute(range);
+    	} finally {
+    		kernel.dispose();
+    	}
+    	assertEquals("Old value doesn't match", in[0], out[0]);
+    	assertEquals("Final value doesn't match", in[1], out[1]);
+    }
+
+    @Test
+    public void issue81AtomicMin2OpenCLExplicit() {
+    	Device openCLDevice = KernelManager.instance().bestDevice();
+    	final int in[] = new int[2];
+    	final int[] out = new int[2];
+    	in[0] = 50;
+    	in[1] = 51;
+    	
+    	final AtomicMin kernel = new AtomicMin(in, out);
+    	try {
+	    	final Range range = openCLDevice.createRange(1,1);
+	        kernel.setExplicit(true);
+	        kernel.put(in);
+	        kernel.execute(range);
+	        kernel.get(out);
+    	} finally {
+    		kernel.dispose();
+    	}
+    	assertEquals("Old value doesn't match", in[0], out[0]);
+    	assertEquals("Final value doesn't match", in[0], out[1]);
+    }
+
+    @Test
+    public void issue81AtomicMin2OpenCL() {
+    	Device openCLDevice = KernelManager.instance().bestDevice();
+    	final int in[] = new int[2];
+    	final int[] out = new int[2];
+    	in[0] = 50;
+    	in[1] = 51;
+    	
+    	final AtomicMin kernel = new AtomicMin(in, out);
+    	try {
+	    	final Range range = openCLDevice.createRange(1,1);
+	        kernel.execute(range);
+    	} finally {
+    		kernel.dispose();
+    	}
+    	assertEquals("Old value doesn't match", in[0], out[0]);
+    	assertEquals("Final value doesn't match", in[0], out[1]);
+    }
+
+    @Test
+    public void issue81AtomicMin2JTP() {
+    	KernelManager.setKernelManager(new JTPKernelManager());
+    	Device device = KernelManager.instance().bestDevice();
+    	final int in[] = new int[2];
+    	final int[] out = new int[2];
+    	in[0] = 50;
+    	in[1] = 51;
+    	
+    	final AtomicMin kernel = new AtomicMin(in, out);
+    	try {
+	    	final Range range = device.createRange(1,1);
+	    	kernel.execute(range);
+    	} finally {
+    		kernel.dispose();
+    	}
+    	assertEquals("Old value doesn't match", in[0], out[0]);
+    	assertEquals("Final value doesn't match", in[0], out[1]);
+    }
+
+    /**
+     * Kernel for single threaded validation of atomicMin.
+     * Validates that a min operation is actually performed.
+     * @author lpnm
+     *
+     */
+    private static final class AtomicMin extends Kernel {
+    	private int in[];
+    	private int out[];
+    	
+    	public AtomicMin(int[] in, int out[]) {
+    		this.in = in;
+    		this.out = out;
+    		atomicValues = new AtomicInteger[2];
+    		atomicValues[0] = new AtomicInteger(0);
+    		atomicValues[1] = new AtomicInteger(0);
+    	}
+
+    	@Local
+    	private AtomicInteger atomicValues[];
+    	
+		@Override
+		public void run() {
+			atomicSet(atomicValues[0], in[0]);
+			out[0] = atomicMin(atomicValues[0], in[1]);
+			out[1] = atomicGet(atomicValues[0]);			
+		}
+
+    }
+
+    @Test
+    public void issue81AtomicMax1OpenCLExplicit() {
+    	Device openCLDevice = KernelManager.instance().bestDevice();
+    	final int in[] = new int[2];
+    	final int[] out = new int[2];
+    	in[0] = 50;
+    	in[1] = 51;
+    	
+    	final AtomicMax kernel = new AtomicMax(in, out);
+    	try {
+	    	final Range range = openCLDevice.createRange(1,1);
+	        kernel.setExplicit(true);
+	        kernel.put(in);
+	        kernel.execute(range);
+	        kernel.get(out);
+    	} finally {
+    		kernel.dispose();
+    	}
+    	assertEquals("Old value doesn't match", in[0], out[0]);
+    	assertEquals("Final value doesn't match", in[1], out[1]);
+    }
+
+    @Test
+    public void issue81AtomicMax1OpenCL() {
+    	Device openCLDevice = KernelManager.instance().bestDevice();
+    	final int in[] = new int[2];
+    	final int[] out = new int[2];
+    	in[0] = 50;
+    	in[1] = 51;
+    	
+    	final AtomicMax kernel = new AtomicMax(in, out);
+    	try {
+	    	final Range range = openCLDevice.createRange(1,1);
+	        kernel.execute(range);
+    	} finally {
+    		kernel.dispose();
+    	}
+    	assertEquals("Old value doesn't match", in[0], out[0]);
+    	assertEquals("Final value doesn't match", in[1], out[1]);
+    }
+    
+    @Test
+    public void issue81AtomicMax1JTP() {
+    	KernelManager.setKernelManager(new JTPKernelManager());
+    	Device device = KernelManager.instance().bestDevice();
+    	final int in[] = new int[2];
+    	final int[] out = new int[2];
+    	in[0] = 50;
+    	in[1] = 51;
+    	
+    	final AtomicMax kernel = new AtomicMax(in, out);
+    	try {
+	    	final Range range = device.createRange(1,1);
+	    	kernel.execute(range);
+    	} finally {
+    		kernel.dispose();
+    	}
+    	assertEquals("Old value doesn't match", in[0], out[0]);
+    	assertEquals("Final value doesn't match", in[1], out[1]);
+    }
+
+    @Test
+    public void issue81AtomicMax2OpenCLExplicit() {
+    	Device openCLDevice = KernelManager.instance().bestDevice();
+    	final int in[] = new int[2];
+    	final int[] out = new int[2];
+    	in[0] = 50;
+    	in[1] = 49;
+    	
+    	final AtomicMax kernel = new AtomicMax(in, out);
+    	try {
+	    	final Range range = openCLDevice.createRange(1,1);
+	        kernel.setExplicit(true);
+	        kernel.put(in);
+	        kernel.execute(range);
+	        kernel.get(out);
+    	} finally {
+    		kernel.dispose();
+    	}
+    	assertEquals("Old value doesn't match", in[0], out[0]);
+    	assertEquals("Final value doesn't match", in[0], out[1]);
+    }
+
+    @Test
+    public void issue81AtomicMax2OpenCL() {
+    	Device openCLDevice = KernelManager.instance().bestDevice();
+    	final int in[] = new int[2];
+    	final int[] out = new int[2];
+    	in[0] = 50;
+    	in[1] = 49;
+    	
+    	final AtomicMax kernel = new AtomicMax(in, out);
+    	try {
+	    	final Range range = openCLDevice.createRange(1,1);
+	        kernel.execute(range);
+    	} finally {
+    		kernel.dispose();
+    	}
+    	assertEquals("Old value doesn't match", in[0], out[0]);
+    	assertEquals("Final value doesn't match", in[0], out[1]);
+    }
+
+    @Test
+    public void issue81AtomicMax2JTP() {
+    	KernelManager.setKernelManager(new JTPKernelManager());
+    	Device device = KernelManager.instance().bestDevice();
+    	final int in[] = new int[2];
+    	final int[] out = new int[2];
+    	in[0] = 50;
+    	in[1] = 49;
+    	
+    	final AtomicMax kernel = new AtomicMax(in, out);
+    	try {
+	    	final Range range = device.createRange(1,1);
+	    	kernel.execute(range);
+    	} finally {
+    		kernel.dispose();
+    	}
+    	assertEquals("Old value doesn't match", in[0], out[0]);
+    	assertEquals("Final value doesn't match", in[0], out[1]);
+    }
+
+    /**
+     * Kernel for single threaded validation of atomicMax.
+     * Validates that a max operation is actually performed.
+     * @author lpnm
+     *
+     */
+    private static final class AtomicMax extends Kernel {
+    	private int in[];
+    	private int out[];
+    	
+    	public AtomicMax(int[] in, int out[]) {
+    		this.in = in;
+    		this.out = out;
+    		atomicValues = new AtomicInteger[2];
+    		atomicValues[0] = new AtomicInteger(0);
+    		atomicValues[1] = new AtomicInteger(0);
+    	}
+
+    	@Local
+    	private AtomicInteger atomicValues[];
+    	
+		@Override
+		public void run() {
+			atomicSet(atomicValues[0], in[0]);
+			out[0] = atomicMax(atomicValues[0], in[1]);
+			out[1] = atomicGet(atomicValues[0]);
+		}
+
+    }
+
+    @Test
+    public void issue81AtomicAndOpenCLExplicit() {
+    	Device openCLDevice = KernelManager.instance().bestDevice();
+    	final int in[] = new int[2];
+    	final int[] out = new int[2];
+    	in[0] = 0xf1;
+    	in[1] = 0x8f;
+    	
+    	final AtomicAnd kernel = new AtomicAnd(in, out);
+    	try {
+	    	final Range range = openCLDevice.createRange(1,1);
+	        kernel.setExplicit(true);
+	        kernel.put(in);
+	        kernel.execute(range);
+	        kernel.get(out);
+    	} finally {
+    		kernel.dispose();
+    	}
+    	assertEquals("Old value doesn't match", in[0], out[0]);
+    	assertEquals("Final value doesn't match", 0x81, out[1]);
+    }
+
+    @Test
+    public void issue81AtomicAndOpenCL() {
+    	Device openCLDevice = KernelManager.instance().bestDevice();
+    	final int in[] = new int[2];
+    	final int[] out = new int[2];
+    	in[0] = 0xf1;
+    	in[1] = 0x8f;
+    	
+    	final AtomicAnd kernel = new AtomicAnd(in, out);
+    	try {
+	    	final Range range = openCLDevice.createRange(1,1);
+	        kernel.execute(range);
+    	} finally {
+    		kernel.dispose();
+    	}
+    	assertEquals("Old value doesn't match", in[0], out[0]);
+    	assertEquals("Final value doesn't match", 0x81, out[1]);
+    }
+    
+    @Test
+    public void issue81AtomicAndJTP() {
+    	KernelManager.setKernelManager(new JTPKernelManager());
+    	Device device = KernelManager.instance().bestDevice();
+    	final int in[] = new int[2];
+    	final int[] out = new int[2];
+    	in[0] = 0xf1;
+    	in[1] = 0x8f;
+    	
+    	final AtomicAnd kernel = new AtomicAnd(in, out);
+    	try {
+	    	final Range range = device.createRange(1,1);
+	    	kernel.execute(range);
+    	} finally {
+    		kernel.dispose();
+    	}
+    	assertEquals("Old value doesn't match", in[0], out[0]);
+    	assertEquals("Final value doesn't match", 0x81, out[1]);
+    }
+    
+    /**
+     * Kernel for single threaded validation of atomicXor.
+     * Validates that an and operation is actually performed.
+     * @author lpnm
+     *
+     */
+    private static final class AtomicAnd extends Kernel {
+    	private int in[];
+    	private int out[];
+    	
+    	public AtomicAnd(int[] in, int out[]) {
+    		this.in = in;
+    		this.out = out;
+    		atomicValues = new AtomicInteger[2];
+    		atomicValues[0] = new AtomicInteger(0);
+    		atomicValues[1] = new AtomicInteger(0);
+    	}
+
+    	@Local
+    	private AtomicInteger atomicValues[];
+    	
+		@Override
+		public void run() {
+			atomicSet(atomicValues[0], in[0]);
+			out[0] = atomicAnd(atomicValues[0], in[1]);
+			out[1] = atomicGet(atomicValues[0]);
+		}
+
+    }
+
+    @Test
+    public void issue81AtomicOrOpenCLExplicit() {
+    	Device openCLDevice = KernelManager.instance().bestDevice();
+    	final int in[] = new int[2];
+    	final int[] out = new int[2];
+    	in[0] = 0x80;
+    	in[1] = 0x02;
+    	
+    	final AtomicOr kernel = new AtomicOr(in, out);
+    	try {
+	    	final Range range = openCLDevice.createRange(1,1);
+	        kernel.setExplicit(true);
+	        kernel.put(in);
+	        kernel.execute(range);
+	        kernel.get(out);
+    	} finally {
+    		kernel.dispose();
+    	}
+    	assertEquals("Old value doesn't match", in[0], out[0]);
+    	assertEquals("Final value doesn't match", 0x82, out[1]);
+    }
+
+    @Test
+    public void issue81AtomicOrOpenCL() {
+    	Device openCLDevice = KernelManager.instance().bestDevice();
+    	final int in[] = new int[2];
+    	final int[] out = new int[2];
+    	in[0] = 0x80;
+    	in[1] = 0x02;
+    	
+    	final AtomicOr kernel = new AtomicOr(in, out);
+    	try {
+	    	final Range range = openCLDevice.createRange(1,1);
+	        kernel.execute(range);
+    	} finally {
+    		kernel.dispose();
+    	}
+    	assertEquals("Old value doesn't match", in[0], out[0]);
+    	assertEquals("Final value doesn't match", 0x82, out[1]);
+    }
+
+    @Test
+    public void issue81AtomicOrJTP() {
+    	KernelManager.setKernelManager(new JTPKernelManager());
+    	Device device = KernelManager.instance().bestDevice();
+    	final int in[] = new int[2];
+    	final int[] out = new int[2];
+    	in[0] = 0x80;
+    	in[1] = 0x02;
+    	
+    	final AtomicOr kernel = new AtomicOr(in, out);
+    	try {
+	    	final Range range = device.createRange(1,1);
+	    	kernel.execute(range);
+    	} finally {
+    		kernel.dispose();
+    	}
+    	assertEquals("Old value doesn't match", in[0], out[0]);
+    	assertEquals("Final value doesn't match", 0x82, out[1]);
+    }
+    
+    /**
+     * Kernel for single threaded validation of atomicOr.
+     * Validates that an or operation is actually performed.
+     * @author lpnm
+     *
+     */
+    private static final class AtomicOr extends Kernel {
+    	private int in[];
+    	private int out[];
+    	
+    	public AtomicOr(int[] in, int out[]) {
+    		this.in = in;
+    		this.out = out;
+    		atomicValues = new AtomicInteger[2];
+    		atomicValues[0] = new AtomicInteger(0);
+    		atomicValues[1] = new AtomicInteger(0);
+    	}
+
+    	@Local
+    	private AtomicInteger atomicValues[];
+    	
+		@Override
+		public void run() {
+			atomicSet(atomicValues[0], in[0]);
+			out[0] = atomicOr(atomicValues[0], in[1]);
+			out[1] = atomicGet(atomicValues[0]);
+		}
+
+    }
+
+    @Test
+    public void issue81AtomicXorOpenCLExplicit() {
+    	Device openCLDevice = KernelManager.instance().bestDevice();
+    	final int in[] = new int[2];
+    	final int[] out = new int[2];
+    	in[0] = 0xf1;
+    	in[1] = 0x8f;
+    	
+    	final AtomicXor kernel = new AtomicXor(in, out);
+    	try {
+	    	final Range range = openCLDevice.createRange(1,1);
+	        kernel.setExplicit(true);
+	        kernel.put(in);
+	        kernel.execute(range);
+	        kernel.get(out);
+    	} finally {
+    		kernel.dispose();
+    	}
+    	assertEquals("Old value doesn't match", in[0], out[0]);
+    	assertEquals("Final value doesn't match", 0x7e, out[1]);
+    }
+
+    @Test
+    public void issue81AtomicXorOpenCL() {
+    	Device openCLDevice = KernelManager.instance().bestDevice();
+    	final int in[] = new int[2];
+    	final int[] out = new int[2];
+    	in[0] = 0xf1;
+    	in[1] = 0x8f;
+    	
+    	final AtomicXor kernel = new AtomicXor(in, out);
+    	try {
+	    	final Range range = openCLDevice.createRange(1,1);
+	        kernel.execute(range);
+    	} finally {
+    		kernel.dispose();
+    	}
+    	assertEquals("Old value doesn't match", in[0], out[0]);
+    	assertEquals("Final value doesn't match", 0x7e, out[1]);
+    }
+    
+    @Test
+    public void issue81AtomicXorJTP() {
+    	KernelManager.setKernelManager(new JTPKernelManager());
+    	Device device = KernelManager.instance().bestDevice();
+    	final int in[] = new int[2];
+    	final int[] out = new int[2];
+    	in[0] = 0xf1;
+    	in[1] = 0x8f;
+    	
+    	final AtomicXor kernel = new AtomicXor(in, out);
+    	try {
+	    	final Range range = device.createRange(1,1);
+	    	kernel.execute(range);
+    	} finally {
+    		kernel.dispose();
+    	}
+    	assertEquals("Old value doesn't match", in[0], out[0]);
+    	assertEquals("Final value doesn't match", 0x7e, out[1]);
+    }
+    
+    /**
+     * Kernel for single threaded validation of atomicXor.
+     * Validates that a xor operation is actually performed.
+     * @author lpnm
+     *
+     */
+    private static final class AtomicXor extends Kernel {
+    	private int in[];
+    	private int out[];
+    	
+    	public AtomicXor(int[] in, int out[]) {
+    		this.in = in;
+    		this.out = out;
+    		atomicValues = new AtomicInteger[2];
+    		atomicValues[0] = new AtomicInteger(0);
+    		atomicValues[1] = new AtomicInteger(0);
+    	}
+
+    	@Local
+    	private AtomicInteger atomicValues[];
+    	
+		@Override
+		public void run() {
+			atomicSet(atomicValues[0], in[0]);
+			out[0] = atomicXor(atomicValues[0], in[1]);
+			out[1] = atomicGet(atomicValues[0]);
+		}
+    }
+
+
+    
+    private static final class AtomicKernel extends Kernel {    	
+    	private int in[];
+    	private int out[];
+    	
+    	@Local
+    	private final AtomicInteger maxs[] = new AtomicInteger[4];
+    	    	
+    	public AtomicKernel(int[] in, int[] out) {
+    		this.in = in;
+    		this.out = out;
+    		for (int idx = 0; idx < 4; idx++) {
+    			maxs[idx] = new AtomicInteger(0);
+    		}
+    	}
+    	
+        @Override
+        public void run() {
+        	final int localId = getLocalId(0);
+        	
+        	//Ensure that initial values are initialized... this must be enforced for OpenCL, otherwise they may contain
+        	//random values, as for Java, it is not needed, as they are already initialized in AtomicInteger constructor.
+        	//Since this is Aparapi, it must be initialized on both platforms. 
+        	if (localId == 0) {
+	        	atomicSet(maxs[MAX_VAL_IDX], 0);
+	        	atomicSet(maxs[LOCK_IDX], 0);
+        	}
+        	//Ensure all threads start with the initialized atomic max value and lock.
+        	localBarrier();
+        	
+        	final int offset = localId * 2;
+    		int localMaxVal = 0;
+    		int localMaxPosFromLeft = 0;
+    		int localMaxPosFromRight = 0;
+    		for (int i = 0; i < 2; i++) {
+    			localMaxVal = max(in[offset + i], localMaxVal);
+    			if (localMaxVal == in[offset + i]) {
+    				localMaxPosFromLeft = offset + i;
+    				localMaxPosFromRight = SIZE - (offset + i);
+    			}
+    		}
+    		
+        	atomicMax(maxs[MAX_VAL_IDX], localMaxVal);
+    		//Ensure all threads have updated the atomic maxs[MAX_VAL_IDX]
+        	localBarrier();
+        	
+        	int maxValue = atomicGet(maxs[MAX_VAL_IDX]);
+        	if (maxValue == localMaxVal) {
+        		//Only the threads that have the max value will reach this point, however the max value, may
+        		//occur at multiple indices of the input array.
+        		if (atomicXchg(maxs[LOCK_IDX], 0xff) == 0) {
+        			//Only one of the threads with the max value will get here, thus ensuring consistent update of
+        			//maxPosFromRight and maxPosFromLeft.
+        			atomicSet(maxs[MAX_POS_LEFT_IDX], localMaxPosFromLeft);
+        			atomicSet(maxs[MAX_POS_RIGHT_IDX], localMaxPosFromRight);
+        			out[MAX_VAL_IDX] = maxValue;
+        			out[MAX_POS_LEFT_IDX] = atomicGet(maxs[MAX_POS_LEFT_IDX]);
+        			out[MAX_POS_RIGHT_IDX] = localMaxPosFromRight;
+        		}
+        	}
+        }
+    }
+    
+    private static final class AtomicBKernel extends Kernel {    	
+    	private int in[];
+    	private AtomicInteger out[];
+    	
+    	@Local
+    	private final AtomicInteger maxs[] = new AtomicInteger[4];
+    	    	
+    	public AtomicBKernel(int[] in, AtomicInteger[] out) {
+    		this.in = in;
+    		this.out = out;
+    		for (int idx = 0; idx < 4; idx++) {
+    			maxs[idx] = new AtomicInteger(0);
+    		}
+    	}
+    	
+        @Override
+        public void run() {
+        	final int localId = getLocalId(0);
+        	
+        	//Ensure that initial values are initialized... this must be enforced for OpenCL, otherwise they may contain
+        	//random values, as for Java, it is not needed, as they are already initialized in AtomicInteger constructor.
+        	//Since this is Aparapi, it must be initialized on both platforms. 
+        	if (localId == 0) {
+	        	atomicSet(maxs[MAX_VAL_IDX], 0);
+	        	atomicSet(maxs[LOCK_IDX], 0);
+        	}
+        	//Ensure all threads start with the initialized atomic max value and lock.
+        	localBarrier();
+        	
+        	final int offset = localId * 2;
+    		int localMaxVal = 0;
+    		int localMaxPosFromLeft = 0;
+    		int localMaxPosFromRight = 0;
+    		for (int i = 0; i < 2; i++) {
+    			localMaxVal = max(in[offset + i], localMaxVal);
+    			if (localMaxVal == in[offset + i]) {
+    				localMaxPosFromLeft = offset + i;
+    				localMaxPosFromRight = SIZE - (offset + i);
+    			}
+    		}
+    		
+        	atomicMax(maxs[MAX_VAL_IDX], localMaxVal);
+    		//Ensure all threads have updated the atomic maxs[MAX_VAL_IDX]
+        	localBarrier();
+        	
+        	int maxValue = atomicGet(maxs[MAX_VAL_IDX]);
+        	if (maxValue == localMaxVal) {
+        		//Only the threads that have the max value will reach this point, however the max value, may
+        		//occur at multiple indices of the input array.
+        		if (atomicXchg(maxs[LOCK_IDX], 0xff) == 0) {
+        			//Only one of the threads with the max value will get here, thus ensuring consistent update of
+        			//maxPosFromRight and maxPosFromLeft.
+        			atomicSet(maxs[MAX_POS_LEFT_IDX], localMaxPosFromLeft);
+        			atomicSet(maxs[MAX_POS_RIGHT_IDX], localMaxPosFromRight);
+        			atomicSet(out[MAX_VAL_IDX], maxValue);
+        			atomicSet(out[MAX_POS_LEFT_IDX], atomicGet(maxs[MAX_POS_LEFT_IDX]));
+        			atomicSet(out[MAX_POS_RIGHT_IDX], localMaxPosFromRight);
+        		}
+        	}
+        }
+    }
+
+}