diff --git a/CHANGELOG.md b/CHANGELOG.md
index ba6f4814e90d2cac4ad3addbae1a092a4b8e4efa..32898a137d0470ddc79807ceedc24b54927656a5 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,6 +7,10 @@
 * Fixed NPE bug for Kernel.getProfileReportCurrentThread(device) and similar methods 
 * Fixed bug where ClassModel would throw an error when loaded if boot strap methods were 0.
 * Aparapi can now run on any OpenCL version rather than failing on untested versions it produces a warning.
+* Fixes Java Alternative algorithm does not work for arbitrary NDRanges #142, #5
+* New Range method API to deal with the fact Ranges need to be bound to the Device and Kernel instances
+* Fixed Range computation of local size for 1D ranges, where the algorithm could exceed the max. kernel and device allowed work group size
+* Reworked Profiling support to deal with the possibility of Kernel compilation being decoupled from the kernel execution
 * Updated the following dependency versions:
 ** com.aparapi: aparapi-jni 1.4.2 -> 1.4.3
 ** org.apache.bcel:bcel 6.4.1 -< 6.5.0
diff --git a/src/main/java/com/aparapi/Kernel.java b/src/main/java/com/aparapi/Kernel.java
index 4b9686dbe07db2168cef1675122e85aacb97b20b..b01f5e57efae92741c5ef091a1598f6bf3a2f5ec 100644
--- a/src/main/java/com/aparapi/Kernel.java
+++ b/src/main/java/com/aparapi/Kernel.java
@@ -90,6 +90,7 @@ import java.util.logging.Logger;
 import com.aparapi.device.Device;
 import com.aparapi.device.JavaDevice;
 import com.aparapi.device.OpenCLDevice;
+import com.aparapi.exception.AparapiKernelFailedException;
 import com.aparapi.exception.CompileFailedException;
 import com.aparapi.internal.kernel.IKernelBarrier;
 import com.aparapi.internal.kernel.KernelArg;
@@ -342,6 +343,9 @@ public abstract class Kernel implements Cloneable {
       public abstract void run();
 
       public Kernel execute(Range _range) {
+         if (!_range.isSameKernel(Kernel.this)) {
+             throw new AparapiKernelFailedException("Cannot execute kernel with the specified Range. It is targetting a different Kernel instance");
+         }         
          return (Kernel.this.execute("foo", _range, 1));
       }
    }
@@ -2801,6 +2805,9 @@ public abstract class Kernel implements Cloneable {
     *
     */
    public synchronized Kernel execute(Range _range) {
+      if (!_range.isSameKernel(this)) {
+         throw new AparapiKernelFailedException("Cannot execute kernel with the specified Range. It is targetting a different Kernel instance");
+      }
       return (execute(_range, 1));
    }
 
@@ -2843,10 +2850,10 @@ public abstract class Kernel implements Cloneable {
    protected Range createRange(int _range) {
       if (executionMode.equals(EXECUTION_MODE.AUTO)) {
          Device device = getTargetDevice();
-         Range range = Range.create(device, _range);
+         Range range = Range.create(this, device, _range);
          return range;
       } else {
-         return Range.create(null, _range);
+         return Range.create(this, null, _range);
       }
    }
 
@@ -2861,6 +2868,9 @@ public abstract class Kernel implements Cloneable {
     *
     */
    public synchronized Kernel execute(Range _range, int _passes) {
+      if (!_range.isSameKernel(this)) {
+          throw new AparapiKernelFailedException("Cannot execute kernel with the specified Range. It is targetting a different Kernel instance");
+      }
       return (execute("run", _range, _passes));
    }
 
@@ -2904,6 +2914,9 @@ public abstract class Kernel implements Cloneable {
     *
     */
    public synchronized Kernel execute(String _entrypoint, Range _range, int _passes) {
+      if (!_range.isSameKernel(this)) {
+         throw new AparapiKernelFailedException("Cannot execute kernel with the specified Range. It is targetting a different Kernel instance");
+      }
       return prepareKernelRunner().execute(_entrypoint, _range, _passes);
    }
 
diff --git a/src/main/java/com/aparapi/ProfileReport.java b/src/main/java/com/aparapi/ProfileReport.java
index de7a8b80ff81801ba3e8262f96dbd63e1ddd484f..01e5e1b133406cf3e3dc0e2cd1328328b8e7e69d 100644
--- a/src/main/java/com/aparapi/ProfileReport.java
+++ b/src/main/java/com/aparapi/ProfileReport.java
@@ -119,11 +119,26 @@ public final class ProfileReport {
        if (stage == ProfilingEvent.START.ordinal()) {
           return 0;
        }
+       if (stage == ProfilingEvent.READY_TO_PREPARE_EXECUTE.ordinal()) {
+           //Ready to prepare execute is a stage that never takes time it is just a partial start time
+           //reference point.
+           return 0.0;
+       }
        return (currentTimes[stage] - currentTimes[stage - 1]) / MILLION;
     }
 
     /** Elapsed time for all events {@code from} through {@code to}.*/
     public double getElapsedTime(int from, int to) {
+       double accum = 0.0;
+       if (from < ProfilingEvent.READY_TO_PREPARE_EXECUTE.ordinal()) {
+           if (to >= ProfilingEvent.READY_TO_PREPARE_EXECUTE.ordinal()) {
+               accum = (currentTimes[ProfilingEvent.OPENCL_COMPILED.ordinal()] - currentTimes[from]) / MILLION;
+               accum += (currentTimes[to] - currentTimes[ProfilingEvent.READY_TO_PREPARE_EXECUTE.ordinal()]) / MILLION;
+               return accum;
+           } else {
+               return (currentTimes[to] - currentTimes[from]) / MILLION; 
+           }
+       }
        return (currentTimes[to] - currentTimes[from]) / MILLION;
     }
     
diff --git a/src/main/java/com/aparapi/Range.java b/src/main/java/com/aparapi/Range.java
index 065612693995e428b30bdbfa9006a71c9b34d3ed..65781d5a52198cd9529d8bcafcfa32a0c09c27d5 100644
--- a/src/main/java/com/aparapi/Range.java
+++ b/src/main/java/com/aparapi/Range.java
@@ -16,8 +16,12 @@
 package com.aparapi;
 
 import com.aparapi.device.*;
+import com.aparapi.exception.AparapiRangeFailedException;
+import com.aparapi.exception.QueryFailedException;
 import com.aparapi.internal.jni.*;
+import com.aparapi.opencl.OpenCL;
 
+import java.lang.ref.WeakReference;
 import java.util.*;
 
 /**
@@ -64,53 +68,99 @@ import java.util.*;
  */
 public class Range extends RangeJNI{
 
+   //Maximum allowed number of threads per core for JTP
    public static final int THREADS_PER_CORE = 16;
 
-   public static final int MAX_OPENCL_GROUP_SIZE = 256;
+   //Default maximum work group size for any possible OpenCL device
+   public static final int MAX_OPENCL_GROUP_SIZE = 1024;
 
+   //This is largest possible MAX_WORK_GRUOP size that Aparapi will handle for any possible OpenCL or JTP device
    public static final int MAX_GROUP_SIZE = Math.max(Runtime.getRuntime().availableProcessors() * THREADS_PER_CORE,
-         MAX_OPENCL_GROUP_SIZE);
+                                                       MAX_OPENCL_GROUP_SIZE);
 
    private OpenCLDevice device = null;
+   
+   //The kernel to which this Range instance pertains
+   private final WeakReference<Kernel> kernel;
 
+   //The actual allowed maximum work group size for a given kernel and device
    private int maxWorkGroupSize;
 
+   //The actual allowed maximum item size for a given device 
    private int[] maxWorkItemSize = new int[] {
          MAX_GROUP_SIZE,
          MAX_GROUP_SIZE,
          MAX_GROUP_SIZE
    };
-
+   
    /**
     * Minimal constructor
     * 
-    * @param _device
-    * @param _dims
+    * @param _kernel the kernel for which this Range is meant
+    * @param _device the device where the kernel is to be executed
+    * @param _dims the dimensions to use for the Range
     */
-   public Range(Device _device, int _dims) {
+   public Range(final Kernel _kernel, final Device _device, final int _dims) {
       device = !(_device instanceof OpenCLDevice) ? null : (OpenCLDevice) _device;
       dims = _dims;
+      
+      kernel = new WeakReference<Kernel>(_kernel);
 
       if (device != null) {
          maxWorkItemSize = device.getMaxWorkItemSize();
-         maxWorkGroupSize = device.getMaxWorkGroupSize();
+         if (kernel.get() == null) {
+             //FIXME OpenCL source code should also be able to retrieve the real max. work group size for its compiled source. 
+             //Use Device hint to the MaxWorkGroupSize, this is only for OpenCL source code ran directly by Aparapi, 
+             //or for querying a device driver
+             maxWorkGroupSize = device.getMaxWorkGroupSize();
+         } else {
+             //This is the codepath for Aparapi Kernels that are to run on a real OpenCL device.
+             try {
+                maxWorkGroupSize = kernel.get().getKernelMaxWorkGroupSize(_device);
+            } catch (QueryFailedException e) {
+                throw new AparapiRangeFailedException("Couldn't retrieve device max. work group size", e);
+            }
+         }
       } else {
-         maxWorkGroupSize = MAX_GROUP_SIZE;
+         //There is no point in allowing a workGroupSize as large as the OpenCL device maximum max. work group size, because 
+         //it will just overload the CPU, when going above the number of real cores, besides Java already account hyper 
+         //threading as an extra real core, which will already overload the machine.
+         maxWorkGroupSize = Runtime.getRuntime().availableProcessors() * THREADS_PER_CORE;
       }
    }
+   
+   /**
+    * Create a one dimensional range <code>0.._globalWidth</code>
+    * <br>
+    * Note that for this range to be valid : <br>
+    * <strong><code> _globalWidth > 0 && _globalWidth <= getMaxWorkItemSize() </code> </strong> 
+    * 
+    * @param _cl the Aparapi OpenCL object for native OpenCL source code
+    * @param _device the intended device where the kernel should run
+    * @param _globalWidth the overall range we wish to process
+    * @return A new Range with the requested dimensions for the specified OpenCL instance and Device instance
+    */
+   public static Range create(final OpenCL<?> _cl, final Device _device, int _globalWidth) {
+      final Range r = Range.create((Kernel)null, _device, _globalWidth);
+       
+      return r;
+   }
 
    /** 
     * Create a one dimensional range <code>0.._globalWidth</code> which is processed in groups of size _localWidth.
-    * <br/>
-    * Note that for this range to be valid : </br> <strong><code>_globalWidth > 0 && _localWidth > 0 && _localWidth < MAX_GROUP_SIZE && _globalWidth % _localWidth==0</code></strong>
+    * <br>
+    * Note that for this range to be valid : <br> 
+    * <strong><code>_globalWidth > 0 && _localWidth > 0 && _localWidth < MAX_GROUP_SIZE && _globalWidth % _localWidth==0</code></strong>
     * 
+    * @param _kernel the Aparapi kernel for which this Range is being created
+    * @param _device the intended device where the kernel should run
     * @param _globalWidth the overall range we wish to process
     * @param _localWidth the size of the group we wish to process.
-    * @return A new Range with the requested dimensions
+    * @return A new Range with the requested dimensions for the specified Kernel instance and Device instance
     */
-   public static Range create(Device _device, int _globalWidth, int _localWidth) {
-      final Range range = new Range(_device, 1);
-
+   public static Range create(final Kernel _kernel, final Device _device, int _globalWidth, int _localWidth) {
+      final Range range = new Range(_kernel, _device, 1);
+      
       range.setGlobalSize_0(_globalWidth);
       range.setLocalSize_0(_localWidth);
 
@@ -126,8 +176,7 @@ public class Range extends RangeJNI{
     * @param _max an upper bound on the value that can be chosen
     * @return and array of factors of _value
     */
-
-   private static int[] getFactors(int _value, int _max) {
+   private static int[] getFactors(final int _value, final int _max) {
       final int factors[] = new int[MAX_GROUP_SIZE];
       int factorIdx = 0;
 
@@ -142,21 +191,24 @@ public class Range extends RangeJNI{
 
    /** 
     * Create a one dimensional range <code>0.._globalWidth</code> with an undefined group size.
-    * <br/>
+    * <br>
     * Note that for this range to be valid :- </br> <strong><code>_globalWidth > 0 </code></strong>
-    * <br/>
+    * <br>
     * The groupsize will be chosen such that _localWidth > 0 && _localWidth < MAX_GROUP_SIZE && _globalWidth % _localWidth==0 is true
     * 
     * We extract the factors of _globalWidth and choose the highest value.
     * 
+    * @param _kernel the Aparapi kernel for which this Range is being created
+    * @param _device the intended device where the kernel should run 
     * @param _globalWidth the overall range we wish to process
-    * @return A new Range with the requested dimensions
+    * @return A new Range with the requested dimensions for the specified Kernel instance and Device instance
     */
-   public static Range create(Device _device, int _globalWidth) {
-      final Range withoutLocal = create(_device, _globalWidth, 1);
+   public static Range create(final Kernel _kernel, final Device _device, int _globalWidth) {
+      final Range withoutLocal = create(_kernel, _device, _globalWidth, 1);
 
       if (_device == JavaDevice.THREAD_POOL) {
-         withoutLocal.setLocalSize_0(Runtime.getRuntime().availableProcessors());
+         createThreadPoolHelper(_globalWidth, withoutLocal);
+         
          withoutLocal.setLocalIsDerived(true);
          return withoutLocal;
       } else if (_device instanceof JavaDevice) {
@@ -168,13 +220,19 @@ public class Range extends RangeJNI{
          withoutLocal.setLocalIsDerived(true);
          return withoutLocal;
       }
-
+           
       if (withoutLocal.isValid()) {
          withoutLocal.setLocalIsDerived(true);
-         final int[] factors = getFactors(withoutLocal.getGlobalSize_0(), withoutLocal.getMaxWorkItemSize()[0]);
-
-         withoutLocal.setLocalSize_0(factors[factors.length - 1]);
-
+         final int[] factors = getFactors(withoutLocal.getGlobalSize_0(), withoutLocal.getMaxWorkGroupSize());
+
+         //Avoid a factor that is greater than the maximum allowed work group size for the kernel.
+         //int index = 0;
+         //for (index = factors.length - 1; index > 0 && factors[index] > withoutLocal.getMaxWorkGroupSize(); index--);
+         
+         int localSize = factors[factors.length-1];
+         
+         withoutLocal.setLocalSize_0(localSize);
+         
          withoutLocal.setValid((withoutLocal.getLocalSize_0() > 0)
                && (withoutLocal.getLocalSize_0() <= withoutLocal.getMaxWorkItemSize()[0])
                && (withoutLocal.getLocalSize_0() <= withoutLocal.getMaxWorkGroupSize())
@@ -184,28 +242,96 @@ public class Range extends RangeJNI{
       return (withoutLocal);
    }
 
-   public static Range create(int _globalWidth, int _localWidth) {
-      final Range range = create(null, _globalWidth, _localWidth);
+   /**
+    * Create helper for determining a suitable local size for the JTP execution mode.
+    * @param _globalWidth the user specified global width
+    * @param withoutLocal the Range instance
+    */
+   private static void createThreadPoolHelper(int _globalWidth, final Range withoutLocal) {
+      int availableProcessors = Runtime.getRuntime().availableProcessors();
+      if (availableProcessors > _globalWidth) {
+         withoutLocal.setLocalSize_0(_globalWidth);
+      } else if (_globalWidth % availableProcessors == 0) {
+         withoutLocal.setLocalSize_0(availableProcessors);
+      } else if (_globalWidth % (availableProcessors / 2) == 0) {
+         withoutLocal.setLocalSize_0(availableProcessors / 2);
+      } else {
+         withoutLocal.setLocalSize_0(1);
+      }
+   }
+
+   /** 
+    * Create a one dimensional range <code>0.._globalWidth</code> which is processed in groups of size _localWidth.
+    * <br>
+    * Note that for this range to be valid : <br> 
+    * <strong><code>_globalWidth > 0 && _localWidth > 0 && _localWidth < MAX_GROUP_SIZE && _globalWidth % _localWidth==0</code></strong>
+    *  
+    * @param _kernel the Aparapi kernel for which this Range is being created
+    * @param _globalWidth the overall range we wish to process
+    * @param _localWidth the size of the local group we wish to process
+    * @return A new Range with the requested dimensions for the specified Kernel instance and assigned Device instance
+    */
+   public static Range create(final Kernel _kernel, int _globalWidth, int _localWidth) {
+      final Range range = create(_kernel, null, _globalWidth, _localWidth);
 
       return (range);
    }
-
-   public static Range create(int _globalWidth) {
-      final Range range = create(null, _globalWidth);
+   
+   /** 
+    * Create a one dimensional range <code>0.._globalWidth</code> with an undefined group size.
+    * <br>
+    * Note that for this range to be valid :- </br> <strong><code>_globalWidth > 0 </code></strong>
+    * <br>
+    * The groupsize will be chosen such that _localWidth > 0 && _localWidth < MAX_GROUP_SIZE && _globalWidth % _localWidth==0 is true
+    * 
+    * We extract the factors of _globalWidth and choose the highest value.
+    * 
+    * @param _kernel the Aparapi kernel for which this Range is being created
+    * @param _globalWidth the overall range we wish to process
+    * @return A new Range with the requested dimensions for the specified Kernel instance and assigned Device instance
+    */
+   public static Range create(final Kernel _kernel, int _globalWidth) {
+      final Range range = create(_kernel, null, _globalWidth);
 
       return (range);
    }
 
-   /** 
+   /**
     * Create a two dimensional range 0.._globalWidth x 0.._globalHeight using a group which is _localWidth x _localHeight in size.
-    * <br/>
+    * <br>
     * Note that for this range to be valid  _globalWidth > 0 &&  _globalHeight >0 && _localWidth>0 && _localHeight>0 && _localWidth*_localHeight < MAX_GROUP_SIZE && _globalWidth%_localWidth==0 && _globalHeight%_localHeight==0.
     * 
-    *  @param _globalWidth the overall range we wish to process
-    * @return
-    */
-   public static Range create2D(Device _device, int _globalWidth, int _globalHeight, int _localWidth, int _localHeight) {
-      final Range range = new Range(_device, 2);
+    * @param _cl the Aparapi OpenCL object for native OpenCL source code
+    * @param _device the intended device where the kernel should run
+    * @param _globalWidth the with range we wish to process
+    * @param _globalHeight the height range we wish to process
+    * @param _localWidth the local group width
+    * @param _localHeight the local group height
+    * @return the Range instance for the intended OpenCL instance and Device instance
+    */
+   public static Range create2D(final OpenCL<?> _cl, final Device _device, 
+            int _globalWidth, int _globalHeight, int _localWidth, int _localHeight) {
+       final Range r = Range.create2D((Kernel)null, _device, _globalWidth, _globalHeight, _localWidth, _localHeight);
+       
+       return r;
+   }
+   
+   /** 
+    * Create a two dimensional range 0.._globalWidth x 0.._globalHeight using a group which is _localWidth x _localHeight in size.
+    * <br>
+    * Note that for this range to be valid  _globalWidth > 0 &&  _globalHeight >0 && _localWidth>0 && _localHeight>0 && _localWidth*_localHeight < MAX_GROUP_SIZE && _globalWidth%_localWidth==0 && _globalHeight%_localHeight==0.
+    *
+    * @param _cl the Aparapi OpenCL object for native OpenCL source code
+    * @param _device the intended device where the kernel should run  
+    * @param _globalWidth the with range we wish to process
+    * @param _globalHeight the height range we wish to process
+    * @param _localWidth the local group width
+    * @param _localHeight the local group height
+    * @return the Range instance for the intended OpenCL instance and Device instance
+    */
+   public static Range create2D(final Kernel _kernel, final Device _device, 
+           int _globalWidth, int _globalHeight, int _localWidth, int _localHeight) {
+      final Range range = new Range(_kernel, _device, 2);
 
       range.setGlobalSize_0(_globalWidth);
       range.setLocalSize_0(_localWidth);
@@ -222,6 +348,12 @@ public class Range extends RangeJNI{
       return (range);
    }
 
+   public static Range create2D(final OpenCL<?> _cl, final Device _device, int _globalWidth, int _globalHeight) {
+       final Range r = Range.create2D((Kernel)null, _device, _globalWidth, _globalHeight);
+       
+       return r;
+   }
+   
    /** 
     * Create a two dimensional range <code>0.._globalWidth * 0.._globalHeight</code> choosing suitable values for <code>localWidth</code> and <code>localHeight</code>.
     * <p>
@@ -234,44 +366,18 @@ public class Range extends RangeJNI{
     * <p>
     * For example for <code>MAX_GROUP_SIZE</code> of 16 we favor 4x4 over 1x16.
     * 
-    * @param _globalWidth the overall range we wish to process
-    * @return
+    * @param _kernel the Aparapi kernel for which this Range is being created
+    * @param _device the intended device where the kernel should run  
+    * @param _globalWidth the with range we wish to process
+    * @param _globalHeight the height range we wish to process
+    * @return the Range instance for the intended Kernel instance and Device instance
     */
-   public static Range create2D(Device _device, int _globalWidth, int _globalHeight) {
-      final Range withoutLocal = create2D(_device, _globalWidth, _globalHeight, 1, 1);
+   public static Range create2D(final Kernel _kernel, final Device _device, int _globalWidth, int _globalHeight) {
+      final Range withoutLocal = create2D(_kernel, _device, _globalWidth, _globalHeight, 1, 1);
 
       if (withoutLocal.isValid()) {
          withoutLocal.setLocalIsDerived(true);
-         final int[] widthFactors = getFactors(_globalWidth, withoutLocal.getMaxWorkItemSize()[0]);
-         final int[] heightFactors = getFactors(_globalHeight, withoutLocal.getMaxWorkItemSize()[1]);
-
-         withoutLocal.setLocalSize_0(1);
-         withoutLocal.setLocalSize_1(1);
-         int max = 1;
-         int perimeter = 0;
-
-         for (final int w : widthFactors) {
-            for (final int h : heightFactors) {
-               final int size = w * h;
-               if (size > withoutLocal.getMaxWorkGroupSize()) {
-                  break;
-               }
-
-               if (size > max) {
-                  max = size;
-                  perimeter = w + h;
-                  withoutLocal.setLocalSize_0(w);
-                  withoutLocal.setLocalSize_1(h);
-               } else if (size == max) {
-                  final int localPerimeter = w + h;
-                  if (localPerimeter < perimeter) {// is this the shortest perimeter so far
-                     perimeter = localPerimeter;
-                     withoutLocal.setLocalSize_0(w);
-                     withoutLocal.setLocalSize_1(h);
-                  }
-               }
-            }
-         }
+         create2DHelper(_globalWidth, _globalHeight, withoutLocal);
 
          withoutLocal.setValid((withoutLocal.getLocalSize_0() > 0) && (withoutLocal.getLocalSize_1() > 0)
                && (withoutLocal.getLocalSize_0() <= withoutLocal.getMaxWorkItemSize()[0])
@@ -284,35 +390,134 @@ public class Range extends RangeJNI{
       return (withoutLocal);
    }
 
-   public static Range create2D(int _globalWidth, int _globalHeight, int _localWidth, int _localHeight) {
-      final Range range = create2D(null, _globalWidth, _globalHeight, _localWidth, _localHeight);
+/**
+ * Helper method for create2D to adjust the 2D local size from the MaxWorkGroupSize and GlobalSizes 
+ * @param _globalWidth the user specified globalWidth
+ * @param _globalHeight the user specified globalHeight
+ * @param withoutLocal the Range instance
+ */
+private static Range create2DHelper(int _globalWidth, int _globalHeight, final Range withoutLocal) {
+    final int[] widthFactors = getFactors(_globalWidth, withoutLocal.getMaxWorkItemSize()[0]);
+     final int[] heightFactors = getFactors(_globalHeight, withoutLocal.getMaxWorkItemSize()[1]);
+
+     withoutLocal.setLocalSize_0(1);
+     withoutLocal.setLocalSize_1(1);
+     int max = 1;
+     int perimeter = 0;
+
+     for (final int w : widthFactors) {
+        for (final int h : heightFactors) {
+           final int size = w * h;
+           if (size > withoutLocal.getMaxWorkGroupSize()) {
+              break;
+           }
+
+           if (size > max) {
+              max = size;
+              perimeter = w + h;
+              withoutLocal.setLocalSize_0(w);
+              withoutLocal.setLocalSize_1(h);
+           } else if (size == max) {
+              final int localPerimeter = w + h;
+              if (localPerimeter < perimeter) {// is this the shortest perimeter so far
+                 perimeter = localPerimeter;
+                 withoutLocal.setLocalSize_0(w);
+                 withoutLocal.setLocalSize_1(h);
+              }
+           }
+        }
+     }
+     
+     return withoutLocal;
+}
+ 
+   /** 
+    * Create a two dimensional range <code>0.._globalWidth * 0.._globalHeight</code> choosing suitable values for <code>localWidth</code> and <code>localHeight</code>.
+    * <p>
+    * Note that for this range to be valid  <code>_globalWidth > 0 &&  _globalHeight >0 && _localWidth>0 && _localHeight>0 && _localWidth*_localHeight < MAX_GROUP_SIZE && _globalWidth%_localWidth==0 && _globalHeight%_localHeight==0</code>.
+    * 
+    * @param _kernel the Aparapi kernel for which this Range is being created
+    * @param _globalWidth the with range we wish to process
+    * @param _globalHeight the height range we wish to process
+    * @param _localWidth the local group width
+    * @param _localHeight the local group height
+    * @return the Range instance for the intended Kernel instance and assigned Device instance
+    */
+   public static Range create2D(final Kernel _kernel, int _globalWidth, int _globalHeight, int _localWidth, int _localHeight) {
+      final Range range = create2D(_kernel, null, _globalWidth, _globalHeight, _localWidth, _localHeight);
 
       return (range);
    }
 
-   public static Range create2D(int _globalWidth, int _globalHeight) {
-      final Range range = create2D(null, _globalWidth, _globalHeight);
+   /** 
+    * Create a two dimensional range <code>0.._globalWidth * 0.._globalHeight</code> choosing suitable values for <code>localWidth</code> and <code>localHeight</code>.
+    * <p>
+    * Note that for this range to be valid  <code>_globalWidth > 0 &&  _globalHeight >0 && _localWidth>0 && _localHeight>0 && _localWidth*_localHeight < MAX_GROUP_SIZE && _globalWidth%_localWidth==0 && _globalHeight%_localHeight==0</code>.
+    * 
+    * <p>
+    * To determine suitable values for <code>_localWidth</code> and <code>_localHeight</code> we extract the factors for <code>_globalWidth</code> and <code>_globalHeight</code> and then 
+    * find the largest product ( <code><= MAX_GROUP_SIZE</code>) with the lowest perimeter.
+    * 
+    * <p>
+    * For example for <code>MAX_GROUP_SIZE</code> of 16 we favor 4x4 over 1x16.
+    * 
+    * @param _kernel the Aparapi kernel for which this Range is being created
+    * @param _globalWidth the with range we wish to process
+    * @param _globalHeight the height range we wish to process
+    * @return the Range instance for the intended Kernel instance and assigned Device instance
+    */
+   public static Range create2D(final Kernel _kernel, int _globalWidth, int _globalHeight) {
+      final Range range = create2D(_kernel, null, _globalWidth, _globalHeight);
 
       return (range);
    }
 
+   
+   /** 
+    * Create a three dimensional range <code>0.._globalWidth * 0.._globalHeight *0../_globalDepth</code> 
+    * in groups defined by  <code>localWidth</code> * <code>localHeight</code> * <code>localDepth</code>.
+    * <p>
+    * Note that for this range to be valid  <code>_globalWidth > 0 &&  _globalHeight >0 _globalDepth >0 && _localWidth>0 && _localHeight>0 && _localDepth>0 && _localWidth*_localHeight*_localDepth < MAX_GROUP_SIZE && _globalWidth%_localWidth==0 && _globalHeight%_localHeight==0 && _globalDepth%_localDepth==0</code>.
+    * 
+    * @param _cl the Aparapi OpenCL object for native OpenCL source code
+    * @param _device the intended device where the kernel should run  
+    * @param _globalWidth the width of the 3D grid we wish to process
+    * @param _globalHeight the height of the 3D grid we wish to process
+    * @param _globalDepth the depth of the 3D grid we wish to process
+    * @param _localWidth the width of the 3D group we wish to process
+    * @param _localHeight the height of the 3D group we wish to process
+    * @param _localDepth the depth of the 3D group we wish to process
+    * @return the Range instance for the intended OpenCL instance and Device instance
+    */
+   public static Range create3D(final OpenCL<?> _cl, final Device _device, 
+           int _globalWidth, int _globalHeight, int _globalDepth, 
+           int _localWidth,  int _localHeight, int _localDepth) {
+       final Range r = Range.create3D((Kernel)null, _device, _globalWidth, _globalHeight, _globalDepth, _localWidth,
+                                 _localHeight, _localDepth);
+       
+       return r;
+   }
+   
    /** 
-    * Create a two dimensional range <code>0.._globalWidth * 0.._globalHeight *0../_globalDepth</code> 
+    * Create a three dimensional range <code>0.._globalWidth * 0.._globalHeight *0../_globalDepth</code> 
     * in groups defined by  <code>localWidth</code> * <code>localHeight</code> * <code>localDepth</code>.
     * <p>
     * Note that for this range to be valid  <code>_globalWidth > 0 &&  _globalHeight >0 _globalDepth >0 && _localWidth>0 && _localHeight>0 && _localDepth>0 && _localWidth*_localHeight*_localDepth < MAX_GROUP_SIZE && _globalWidth%_localWidth==0 && _globalHeight%_localHeight==0 && _globalDepth%_localDepth==0</code>.
     * 
+    * @param _kernel the Aparapi kernel for which this Range is being created
+    * @param _device the intended device where the kernel should run  
     * @param _globalWidth the width of the 3D grid we wish to process
     * @param _globalHeight the height of the 3D grid we wish to process
     * @param _globalDepth the depth of the 3D grid we wish to process
     * @param _localWidth the width of the 3D group we wish to process
     * @param _localHeight the height of the 3D group we wish to process
     * @param _localDepth the depth of the 3D group we wish to process
-    * @return
+    * @return the Range instance for the intended Kerneç instance and Device instance
     */
-   public static Range create3D(Device _device, int _globalWidth, int _globalHeight, int _globalDepth, int _localWidth,
-         int _localHeight, int _localDepth) {
-      final Range range = new Range(_device, 3);
+   public static Range create3D(final Kernel _kernel, final Device _device, 
+         int _globalWidth, int _globalHeight, int _globalDepth, 
+         int _localWidth,  int _localHeight, int _localDepth) {
+      final Range range = new Range(_kernel, _device, 3);
 
       range.setGlobalSize_0(_globalWidth);
       range.setLocalSize_0(_localWidth);
@@ -332,6 +537,25 @@ public class Range extends RangeJNI{
       return (range);
    }
 
+   /**
+    * Create a three dimensional range <code>0.._globalWidth * 0.._globalHeight *0../_globalDepth</code> 
+    * <p>
+    * Note that for this range to be valid  <code>_globalWidth > 0 &&  _globalHeight >0 _globalDepth >0</code> and must
+    * not exceed max. work item sizes for each dimension.
+    * 
+    * @param _cl the Aparapi OpenCL object for native OpenCL source code
+    * @param _device the intended device where the kernel should run  
+    * @param _globalWidth the width of the 3D grid we wish to process
+    * @param _globalHeight the height of the 3D grid we wish to process
+    * @param _globalDepth the depth of the 3D grid we wish to process
+    * @return the Range instance for the intended OpenCL instance and Device instance
+    */
+   public static Range create3D(final OpenCL<?> _cl, final Device _device, int _globalWidth, int _globalHeight, int _globalDepth) {
+       Range r = Range.create3D((Kernel)null, _device, _globalWidth, _globalHeight, _globalDepth, 1, 1, 1);
+       
+       return r;
+   }
+   
    /** 
     * Create a three dimensional range <code>0.._globalWidth * 0.._globalHeight *0../_globalDepth</code> 
     * choosing suitable values for <code>localWidth</code>, <code>localHeight</code> and <code>localDepth</code>.
@@ -345,54 +569,20 @@ public class Range extends RangeJNI{
     * <p>
     * For example for <code>MAX_GROUP_SIZE</code> of 64 we favor 4x4x4 over 1x16x16.
     * 
+    * @param _kernel the Aparapi kernel for which this Range is being created
+    * @param _device the intended device where the kernel should run  
     * @param _globalWidth the width of the 3D grid we wish to process
     * @param _globalHeight the height of the 3D grid we wish to process
     * @param _globalDepth the depth of the 3D grid we wish to process
-    * @return
+    * @return the Range instance for the intended Kernel instance and Device instance
     */
-   public static Range create3D(Device _device, int _globalWidth, int _globalHeight, int _globalDepth) {
-      final Range withoutLocal = create3D(_device, _globalWidth, _globalHeight, _globalDepth, 1, 1, 1);
+   public static Range create3D(final Kernel _kernel, final Device _device, int _globalWidth, int _globalHeight, int _globalDepth) {
+      final Range withoutLocal = create3D(_kernel, _device, _globalWidth, _globalHeight, _globalDepth, 1, 1, 1);
 
       if (withoutLocal.isValid()) {
          withoutLocal.setLocalIsDerived(true);
 
-         final int[] widthFactors = getFactors(_globalWidth, withoutLocal.getMaxWorkItemSize()[0]);
-         final int[] heightFactors = getFactors(_globalHeight, withoutLocal.getMaxWorkItemSize()[1]);
-         final int[] depthFactors = getFactors(_globalDepth, withoutLocal.getMaxWorkItemSize()[2]);
-
-         withoutLocal.setLocalSize_0(1);
-         withoutLocal.setLocalSize_1(1);
-         withoutLocal.setLocalSize_2(1);
-
-         int max = 1;
-         int perimeter = 0;
-
-         for (final int w : widthFactors) {
-            for (final int h : heightFactors) {
-               for (final int d : depthFactors) {
-                  final int size = w * h * d;
-                  if (size > withoutLocal.getMaxWorkGroupSize()) {
-                     break;
-                  }
-
-                  if (size > max) {
-                     max = size;
-                     perimeter = w + h + d;
-                     withoutLocal.setLocalSize_0(w);
-                     withoutLocal.setLocalSize_1(h);
-                     withoutLocal.setLocalSize_2(d);
-                  } else if (size == max) {
-                     final int localPerimeter = w + h + d;
-                     if (localPerimeter < perimeter) { // is this the shortest perimeter so far
-                        perimeter = localPerimeter;
-                        withoutLocal.setLocalSize_0(w);
-                        withoutLocal.setLocalSize_1(w);
-                        withoutLocal.setLocalSize_2(d);
-                     }
-                  }
-               }
-            }
-         }
+         create3DHelper(_globalWidth, _globalHeight, _globalDepth, withoutLocal);
 
          withoutLocal.setValid((withoutLocal.getLocalSize_0() > 0)
                && (withoutLocal.getLocalSize_1() > 0)
@@ -409,15 +599,99 @@ public class Range extends RangeJNI{
       return (withoutLocal);
    }
 
-   public static Range create3D(int _globalWidth, int _globalHeight, int _globalDepth) {
-      final Range range = create3D(null, _globalWidth, _globalHeight, _globalDepth);
+/**
+ * Create 3D range helper method that tries find suitable local sizes from the user specified global sizes and MaxWorkGroupSize.
+ * 
+ * @param _globalWidth the global width specified by the user
+ * @param _globalHeight the global height specified by the user
+ * @param _globalDepth the global depth specified by the user
+ * @param withoutLocal the Range instance
+ */
+   private static void create3DHelper(int _globalWidth, int _globalHeight, int _globalDepth, final Range withoutLocal) {
+     final int[] widthFactors = getFactors(_globalWidth, withoutLocal.getMaxWorkItemSize()[0]);
+     final int[] heightFactors = getFactors(_globalHeight, withoutLocal.getMaxWorkItemSize()[1]);
+     final int[] depthFactors = getFactors(_globalDepth, withoutLocal.getMaxWorkItemSize()[2]);
+
+     withoutLocal.setLocalSize_0(1);
+     withoutLocal.setLocalSize_1(1);
+     withoutLocal.setLocalSize_2(1);
+
+     int max = 1;
+     int perimeter = 0;
+
+     for (final int w : widthFactors) {
+        for (final int h : heightFactors) {
+           for (final int d : depthFactors) {
+              final int size = w * h * d;
+              if (size > withoutLocal.getMaxWorkGroupSize()) {
+                 break;
+              }
+
+              if (size > max) {
+                 max = size;
+                 perimeter = w + h + d;
+                 withoutLocal.setLocalSize_0(w);
+                 withoutLocal.setLocalSize_1(h);
+                 withoutLocal.setLocalSize_2(d);
+              } else if (size == max) {
+                 final int localPerimeter = w + h + d;
+                 if (localPerimeter < perimeter) { // is this the shortest perimeter so far
+                    perimeter = localPerimeter;
+                    withoutLocal.setLocalSize_0(w);
+                    withoutLocal.setLocalSize_1(w);
+                    withoutLocal.setLocalSize_2(d);
+                 }
+              }
+           }
+        }
+     }
+   }
+
+   /** 
+    * Create a three dimensional range <code>0.._globalWidth * 0.._globalHeight *0../_globalDepth</code> 
+    * <p>
+    * Note that for this range to be valid <code>_globalWidth > 0 &&  _globalHeight > 0 _globalDepth > 0</code> and must
+    * not exceed max. work item sizes for each dimension.
+    * 
+    * <p>
+    * To determine suitable values for <code>_localWidth</code>,<code>_localHeight</code> and <code>_lodalDepth</code> we extract the factors for <code>_globalWidth</code>,<code>_globalHeight</code> and <code>_globalDepth</code> and then 
+    * find the largest product ( <code><= MAX_GROUP_SIZE</code>) with the lowest perimeter.
+    * 
+    * <p>
+    * For example for <code>MAX_GROUP_SIZE</code> of 64 we favor 4x4x4 over 1x16x16.
+    * 
+    * @param _kernel the Aparapi kernel for which this Range is being created
+    * @param _device the intended device where the kernel should run  
+    * @param _globalWidth the width of the 3D grid we wish to process
+    * @param _globalHeight the height of the 3D grid we wish to process
+    * @param _globalDepth the depth of the 3D grid we wish to process
+    * @return the Range instance for the intended Kernel instance and assigned Device instance
+    */
+   public static Range create3D(final Kernel _kernel, int _globalWidth, int _globalHeight, int _globalDepth) {
+      final Range range = create3D(_kernel, null, _globalWidth, _globalHeight, _globalDepth);
 
       return (range);
    }
 
-   public static Range create3D(int _globalWidth, int _globalHeight, int _globalDepth, int _localWidth, int _localHeight,
-         int _localDepth) {
-      final Range range = create3D(null, _globalWidth, _globalHeight, _globalDepth, _localWidth, _localHeight, _localDepth);
+   /** 
+    * Create a three dimensional range <code>0.._globalWidth * 0.._globalHeight *0../_globalDepth</code> 
+    * in groups defined by  <code>localWidth</code> * <code>localHeight</code> * <code>localDepth</code>.
+    * <p>
+    * Note that for this range to be valid  <code>_globalWidth > 0 &&  _globalHeight >0 _globalDepth >0 && _localWidth>0 && _localHeight>0 && _localDepth>0 && _localWidth*_localHeight*_localDepth < MAX_GROUP_SIZE && _globalWidth%_localWidth==0 && _globalHeight%_localHeight==0 && _globalDepth%_localDepth==0</code>.
+    * 
+    * @param _kernel the Aparapi kernel for which this Range is being created
+    * @param _globalWidth the width of the 3D grid we wish to process
+    * @param _globalHeight the height of the 3D grid we wish to process
+    * @param _globalDepth the depth of the 3D grid we wish to process
+    * @param _localWidth the width of the 3D group we wish to process
+    * @param _localHeight the height of the 3D group we wish to process
+    * @param _localDepth the depth of the 3D group we wish to process
+    * @return the Range instance for the intended Kernel instance and assigned Device instance
+    */
+   public static Range create3D(final Kernel _kernel, 
+           int _globalWidth, int _globalHeight, int _globalDepth, 
+           int _localWidth, int _localHeight, int _localDepth) {
+      final Range range = create3D(_kernel, null, _globalWidth, _globalHeight, _globalDepth, _localWidth, _localHeight, _localDepth);
       return (range);
    }
 
@@ -654,4 +928,8 @@ public class Range extends RangeJNI{
    public void setMaxWorkItemSize(int[] maxWorkItemSize) {
       this.maxWorkItemSize = maxWorkItemSize;
    }
+   
+   public boolean isSameKernel(Kernel _kernel) {
+       return kernel.get() == _kernel;
+   }
 }
diff --git a/src/main/java/com/aparapi/device/Device.java b/src/main/java/com/aparapi/device/Device.java
index 2e43c43c718958bf7359ad36fde02dbc3f7a358b..b24128fe33f0bc42dc6a6d52eb6b8003a8854ea9 100644
--- a/src/main/java/com/aparapi/device/Device.java
+++ b/src/main/java/com/aparapi/device/Device.java
@@ -134,29 +134,29 @@ public abstract class Device implements Comparable<Device> {
       this.maxWorkItemSize = maxWorkItemSize;
    }
 
-   public Range createRange(int _globalWidth) {
-      return (Range.create(this, _globalWidth));
+   public Range createRange(Kernel kernel, int _globalWidth) {
+      return (Range.create(kernel, this, _globalWidth));
    }
 
-   public Range createRange(int _globalWidth, int _localWidth) {
-      return (Range.create(this, _globalWidth, _localWidth));
+   public Range createRange(Kernel kernel, int _globalWidth, int _localWidth) {
+      return (Range.create(kernel, this, _globalWidth, _localWidth));
    }
 
-   public Range createRange2D(int _globalWidth, int _globalHeight) {
-      return (Range.create2D(this, _globalWidth, _globalHeight));
+   public Range createRange2D(Kernel kernel, int _globalWidth, int _globalHeight) {
+      return (Range.create2D(kernel, this, _globalWidth, _globalHeight));
    }
 
-   public Range createRange2D(int _globalWidth, int _globalHeight, int _localWidth, int _localHeight) {
-      return (Range.create2D(this, _globalWidth, _globalHeight, _localWidth, _localHeight));
+   public Range createRange2D(Kernel kernel, int _globalWidth, int _globalHeight, int _localWidth, int _localHeight) {
+      return (Range.create2D(kernel, this, _globalWidth, _globalHeight, _localWidth, _localHeight));
    }
 
-   public Range createRange3D(int _globalWidth, int _globalHeight, int _globalDepth) {
-      return (Range.create3D(this, _globalWidth, _globalHeight, _globalDepth));
+   public Range createRange3D(Kernel kernel, int _globalWidth, int _globalHeight, int _globalDepth) {
+      return (Range.create3D(kernel, this, _globalWidth, _globalHeight, _globalDepth));
    }
 
-   public Range createRange3D(int _globalWidth, int _globalHeight, int _globalDepth, int _localWidth, int _localHeight,
+   public Range createRange3D(Kernel kernel, int _globalWidth, int _globalHeight, int _globalDepth, int _localWidth, int _localHeight,
          int _localDepth) {
-      return (Range.create3D(this, _globalWidth, _globalHeight, _globalDepth, _localWidth, _localHeight, _localDepth));
+      return (Range.create3D(kernel, this, _globalWidth, _globalHeight, _globalDepth, _localWidth, _localHeight, _localDepth));
    }
 
    public abstract long getDeviceId();
diff --git a/src/main/java/com/aparapi/exception/AparapiRangeFailedException.java b/src/main/java/com/aparapi/exception/AparapiRangeFailedException.java
new file mode 100644
index 0000000000000000000000000000000000000000..5100b8a5a947cef167ec20f8ea839ff306b776dc
--- /dev/null
+++ b/src/main/java/com/aparapi/exception/AparapiRangeFailedException.java
@@ -0,0 +1,42 @@
+/**
+ * Copyright (c) 2016 - 2018 Syncleus, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.aparapi.exception;
+
+/**
+ * This exception is thrown when a Range creation fails.
+ *  
+ * @author CoreRasurae
+ */
+public class AparapiRangeFailedException extends RuntimeException {
+
+	/**
+     * 
+     */
+    private static final long serialVersionUID = 5825738909363220032L;
+
+	public AparapiRangeFailedException(String message) {
+		super(message);
+	}
+
+	public AparapiRangeFailedException(String message, Throwable cause) {
+		super(message, cause);
+	}
+
+	public AparapiRangeFailedException(String message, Throwable cause, boolean enableSuppression,
+			boolean writableStackTrace) {
+		super(message, cause, enableSuppression, writableStackTrace);
+	}
+}
diff --git a/src/main/java/com/aparapi/internal/kernel/KernelDeviceProfile.java b/src/main/java/com/aparapi/internal/kernel/KernelDeviceProfile.java
index d5ba22767b19b8717e84d88a19d7a5a49921bcc9..b9d4f8a97dfc3de23f5d88487c375c1986a66dc9 100644
--- a/src/main/java/com/aparapi/internal/kernel/KernelDeviceProfile.java
+++ b/src/main/java/com/aparapi/internal/kernel/KernelDeviceProfile.java
@@ -72,6 +72,9 @@ public class KernelDeviceProfile {
 		   lock.readLock().lock();
 		   try {
 			   for (int i = 1; i < currentTimes.length; ++i) {
+			       if (i == ProfilingEvent.READY_TO_PREPARE_EXECUTE.ordinal()) {
+			           continue;
+			       }
 				   long elapsed = currentTimes[i] - currentTimes[i - 1];
 	
 				   accumulatedTimes.addAndGet(i, elapsed);
@@ -87,7 +90,11 @@ public class KernelDeviceProfile {
 		  lock.writeLock().lock();
 		  try {
 			  for (int i = 0; i < NUM_EVENTS; i++) {
-				  accumulatedTimesHolder[i] = accumulatedTimes.get(i);
+                  if (i == ProfilingEvent.READY_TO_PREPARE_EXECUTE.ordinal()) {
+                      accumulatedTimesHolder[i] = 0;
+                  } else {
+                      accumulatedTimesHolder[i] = accumulatedTimes.get(i);
+                  }
 			  }	  
 		  } finally {
 			  lock.writeLock().unlock();
@@ -102,6 +109,7 @@ public class KernelDeviceProfile {
 	   private final ProfileReport report;
 	   private final WeakReference<ProfileReport> reportRef; 
 	   private ProfilingEvent lastEvent = null;
+	   private boolean wasLastEventSetFromCompileOnly = false;
 	   private int invocationCount = 0;	   
 
 	   private Accumulator(long _threadId) {
@@ -110,16 +118,34 @@ public class KernelDeviceProfile {
 		   reportRef = new WeakReference<>(report);
 	   }
 	   
-	   private void parseStartEventHelper(final ProfilingEvent event) {
+	   private void parseStartEventHelper(final ProfilingEvent event, boolean compileOnly) {
+	      ProfilingEvent updateValueForLastEvent = event; 
 	      if (event == ProfilingEvent.START) {		 
-	          if (lastEvent != null) {
+	          if (lastEvent != null && !wasLastEventSetFromCompileOnly) {
 	             logger.log(Level.SEVERE, "ProfilingEvent.START encountered without ProfilingEvent.EXECUTED");
 	          } else if (lastEvent == ProfilingEvent.START) {
 	             logger.log(Level.SEVERE, "Duplicate event ProfilingEvent.START");
 	          }
-	          Arrays.fill(currentTimes, 0L);
-	          ++invocationCount;
-	          invocationCountGlobal.incrementAndGet();
+	          
+	          if (!wasLastEventSetFromCompileOnly) {
+	              Arrays.fill(currentTimes, 0L);	              
+	              ++invocationCount;
+	              invocationCountGlobal.incrementAndGet();	              
+	          } else {
+	              //Code reaches here during the START event of a real execute, since compilation can only occur once for a Device,
+	              //and we know that such compilation occurred in the last event. Actually there was also a EXECUTED event which is
+	              //always generated, but can be safely ignored.
+	              //
+	              //So, do not trigger a report count increment and do not reset the timestamps, because this is not a real run.
+                  //We just want to keep the logged compilation time, and record the execution time in the same report.
+	              wasLastEventSetFromCompileOnly = compileOnly;
+	              //So, we resume as if the last event was OPENCL_COMPILED 
+	              lastEvent = ProfilingEvent.OPENCL_COMPILED;
+	              return;
+	          }
+	          
+	          wasLastEventSetFromCompileOnly = compileOnly;
+	          currentTimes[event.ordinal()] = System.nanoTime();
 	       } else {
 	          if (lastEvent == null) {
 	             if (event != ProfilingEvent.EXECUTED) {
@@ -130,28 +156,42 @@ public class KernelDeviceProfile {
 	                currentTimes[i] = currentTimes[i - 1];
 	             }
 	          }
+	          currentTimes[event.ordinal()] = System.nanoTime();
 	       }
-	       currentTimes[event.ordinal()] = System.nanoTime();
-	       if (event == ProfilingEvent.EXECUTED) {
-	          for (int i = 1; i < currentTimes.length; ++i) {
-	             long elapsed = currentTimes[i] - currentTimes[i - 1];
-	             if (elapsed < 0) {
-	                logger.log(Level.SEVERE, "negative elapsed time for event " + event);
-	                break;
-	             }
-	             accumulatedTimes[i] += elapsed;
-	          }
-	          
+	       if (event == ProfilingEvent.OPENCL_COMPILED) {
+               //Accumulated times are divided in two blocks, one until OpenCL kernel compile
+               for (int i = 1; i < ProfilingEvent.READY_TO_PREPARE_EXECUTE.ordinal(); ++i) {
+                 long elapsed = currentTimes[i] - currentTimes[i - 1];
+                 if (elapsed < 0) {
+                    logger.log(Level.SEVERE, "negative elapsed time for event " + event);
+                    break;
+                 }
+                 accumulatedTimes[i] += elapsed;
+              }
+	       }
+	      
+	       if (event == ProfilingEvent.EXECUTED && !compileOnly) {
+	          //and the second block after READY_TO_PREPARE_EXECUTE until EXECUTED, because there can be a time lapse between
+	          //the first and the second block if the kernel is compiled but not executed at once.
+              for (int i = ProfilingEvent.READY_TO_PREPARE_EXECUTE.ordinal() + 1; i < NUM_EVENTS; ++i) {
+                 long elapsed = currentTimes[i] - currentTimes[i - 1];
+                 if (elapsed < 0) {
+                    logger.log(Level.SEVERE, "negative elapsed time for event " + event);
+                    break;
+                 }
+                 accumulatedTimes[i] += elapsed;
+              }	          
 	          globalAcc.accumulateTimes(currentTimes);
 	          lastAccumulator.set(this);
 	       }
+	       	       
+	       lastEvent = updateValueForLastEvent;
 	   }
 	   
-	   private void onEvent(final ProfilingEvent event) {
-		  parseStartEventHelper(event);
-			
-	      lastEvent = event;
-	      if (event == ProfilingEvent.EXECUTED) {
+	   private void onEvent(final ProfilingEvent event, boolean compileOnly) {
+		  parseStartEventHelper(event, compileOnly);
+				      
+	      if (event == ProfilingEvent.EXECUTED && !compileOnly) {
 	    	 updateProfileReport(report, invocationCount, currentTimes);
 	    	 IProfileReportObserver observer = parentKernelProfile.getReportObserver();
 	         lastEvent = null;
@@ -186,8 +226,8 @@ public class KernelDeviceProfile {
       format.setMaximumFractionDigits(3);
    }
 
-   public void onEvent(ProfilingEvent event) {
-	   getAccForThreadPutIfAbsent().onEvent(event);
+   public void onEvent(ProfilingEvent event, boolean compileOnly) {
+	   getAccForThreadPutIfAbsent().onEvent(event, compileOnly);
    }
 
    private ProfileReport updateProfileReport(final ProfileReport report, long invocationCount, long[] currentTimes) {
@@ -206,6 +246,10 @@ public class KernelDeviceProfile {
            return 0;    
 	   }
 	   
+       if (stage == ProfilingEvent.READY_TO_PREPARE_EXECUTE.ordinal()) {
+           return 0.0;
+       }
+	   
 	   Accumulator acc = getAccForThread();
 
 	   return acc == null ? Double.NaN : (acc.currentTimes[stage] - acc.currentTimes[stage - 1]) / MILLION;
@@ -215,7 +259,17 @@ public class KernelDeviceProfile {
    public double getElapsedTimeCurrentThread(int from, int to) {
 	   Accumulator acc = getAccForThread();
 
-	   return acc == null ? Double.NaN : (acc.currentTimes[to] - acc.currentTimes[from]) / MILLION;
+	   if (acc == null) {
+	       return Double.NaN;
+	   }
+	   
+       double accum = 0.0;
+       if (from < ProfilingEvent.READY_TO_PREPARE_EXECUTE.ordinal() && to >= ProfilingEvent.READY_TO_PREPARE_EXECUTE.ordinal()) {
+           accum = (double)(acc.currentTimes[ProfilingEvent.OPENCL_COMPILED.ordinal()] - acc.currentTimes[from]) / MILLION;
+           accum += (double)(acc.currentTimes[to] - acc.currentTimes[ProfilingEvent.READY_TO_PREPARE_EXECUTE.ordinal()]) / MILLION;
+           return accum;
+       }
+       return (double)(acc.currentTimes[to] - acc.currentTimes[from]) / MILLION;	   
    }
    
    /**
@@ -257,6 +311,10 @@ public class KernelDeviceProfile {
    public double getCumulativeElapsedTimeCurrrentThread(ProfilingEvent stage) {
 	   Accumulator acc = getAccForThread();
 
+	   if (stage == ProfilingEvent.READY_TO_PREPARE_EXECUTE) {
+	       return 0.0;
+	   }
+	   
 	   return acc == null ? Double.NaN : acc.accumulatedTimes[stage.ordinal()] / MILLION;
    }
    
@@ -273,6 +331,11 @@ public class KernelDeviceProfile {
 	  }
 
       for (int i = 1; i <= ProfilingEvent.EXECUTED.ordinal(); ++i) {
+         if (i == ProfilingEvent.READY_TO_PREPARE_EXECUTE.ordinal()) {
+             //Ready to prepare execute is a stage that never takes time it is just a partial start time
+             //reference point.             
+             continue;
+         }
          sum += acc.accumulatedTimes[i];
       }
       
@@ -289,8 +352,14 @@ public class KernelDeviceProfile {
            return 0;    
 	   }
 	   
-	   Accumulator acc = lastAccumulator.get();
-
+	   if (stage == ProfilingEvent.READY_TO_PREPARE_EXECUTE.ordinal()) {
+           //Ready to prepare execute is a stage that never takes time it is just a partial start time
+           //reference point.
+           return 0.0;
+       }
+	   
+	   Accumulator acc = lastAccumulator.get();	   
+	   
 	   return acc == null ? Double.NaN : (acc.currentTimes[stage] - acc.currentTimes[stage - 1]) / MILLION;
    }
    
@@ -304,7 +373,17 @@ public class KernelDeviceProfile {
    public double getElapsedTimeLastThread(int from, int to) {
 	   Accumulator acc = lastAccumulator.get();
 	   
-	   return acc == null ? Double.NaN : (acc.currentTimes[to] - acc.currentTimes[from]) / MILLION;
+	   if (acc == null) {
+	       return Double.NaN;	       
+	   }
+	   
+       double accum = 0.0;
+       if (from < ProfilingEvent.READY_TO_PREPARE_EXECUTE.ordinal() && to >= ProfilingEvent.READY_TO_PREPARE_EXECUTE.ordinal()) {
+           accum = (double)(acc.currentTimes[ProfilingEvent.OPENCL_COMPILED.ordinal()] - acc.currentTimes[from]) / MILLION;
+           accum += (double)(acc.currentTimes[to] - acc.currentTimes[ProfilingEvent.READY_TO_PREPARE_EXECUTE.ordinal()]) / MILLION;
+           return accum;
+       }
+       return (double)(acc.currentTimes[to] - acc.currentTimes[from]) / MILLION;
    }
       
    /** 
@@ -315,6 +394,12 @@ public class KernelDeviceProfile {
     */
    public double getCumulativeElapsedTimeGlobal(ProfilingEvent stage) {
 	   final long[] accumulatedTimesHolder = new long[NUM_EVENTS];
+	   if (stage == ProfilingEvent.READY_TO_PREPARE_EXECUTE) {
+           //Ready to prepare execute is a stage that never takes time it is just a partial start time
+           //reference point.
+           return 0.0;
+       }
+	   
 	   globalAcc.consultAccumulatedTimes(accumulatedTimesHolder);
 
 	   return accumulatedTimesHolder[stage.ordinal()] / MILLION;
@@ -330,6 +415,11 @@ public class KernelDeviceProfile {
 
       double sum = 0;
       for (int i = 1; i <= ProfilingEvent.EXECUTED.ordinal(); ++i) {
+         if (i == ProfilingEvent.READY_TO_PREPARE_EXECUTE.ordinal()) {
+             //Ready to prepare execute is a stage that never takes time it is just a partial start time
+             //reference point.
+             continue;
+         }          
          sum += accumulatedTimesHolder[i];
       }
       return sum;
@@ -337,10 +427,9 @@ public class KernelDeviceProfile {
 
    public static synchronized String getTableHeader() {
       if (tableHeader == null) {
-         int length = ProfilingEvent.values().length;
          StringBuilder builder = new StringBuilder(150);
          appendRowHeaders(builder, "Device", "Count");
-         for (int i = 1; i < length; ++i) {
+         for (int i = 1; i < NUM_EVENTS; ++i) {             
             ProfilingEvent stage = ProfilingEvent.values()[i];
             String heading = stage.name();
             appendCell(builder, heading);
@@ -364,10 +453,12 @@ public class KernelDeviceProfile {
 	 
       double total = 0;
       appendRowHeaders(builder, device.getShortDescription(), String.valueOf(invocationCountGlobal.get()));
-      for (int i = 1; i < NUM_EVENTS; ++i) {
+      for (int i = 1; i < NUM_EVENTS; ++i) {         
          ProfilingEvent stage = ProfilingEvent.values()[i];
          double time = getElapsedTimeLastThread(stage.ordinal());
-         total += time;
+         if (i != ProfilingEvent.READY_TO_PREPARE_EXECUTE.ordinal()) {
+            total += time;
+         } 
          String formatted = format.format(time);
          appendCell(builder, formatted);
       }
diff --git a/src/main/java/com/aparapi/internal/kernel/KernelProfile.java b/src/main/java/com/aparapi/internal/kernel/KernelProfile.java
index 933b3f2bb7d3a79c2c31037674963f1109d76b75..966539e1430cfd3694381109d225f59f30e0a137 100644
--- a/src/main/java/com/aparapi/internal/kernel/KernelProfile.java
+++ b/src/main/java/com/aparapi/internal/kernel/KernelProfile.java
@@ -71,9 +71,11 @@ public class KernelProfile {
    /**
     * Starts a profiling information gathering sequence for the current thread invoking this method
     * regarding the specified execution device.
-    * @param device
+    * @param device the device to which the report pertains
+    * @param compileOnly <ul><li>true, if this event is from a compile only dry run</li>
+    *                        <li>false, if this event is from a regular kernel run</li></ul>
     */
-   void onStart(Device device) {
+   void onStart(Device device, boolean compileOnly) {
 	  KernelDeviceProfile currentDeviceProfile = deviceProfiles.get(device);
       if (currentDeviceProfile == null) {    	 
          currentDeviceProfile = new KernelDeviceProfile(this, kernelClass, device);
@@ -83,7 +85,7 @@ public class KernelProfile {
          }
       }
       
-      currentDeviceProfile.onEvent(ProfilingEvent.START);
+      currentDeviceProfile.onEvent(ProfilingEvent.START, compileOnly);
       currentDevice.set(device);
    }
 
@@ -93,25 +95,28 @@ public class KernelProfile {
     * 
     * @param device the device where the kernel is/was executed
     * @param event the event for which the profiling information is being updated
+    * @param compileOnly <ul><li>true, if this event is from a compile only dry run</li>
+    *                        <li>false, if this event is from a regular kernel run</li></ul>
     */
-   void onEvent(Device device, ProfilingEvent event) {
+   void onEvent(Device device, ProfilingEvent event, boolean compileOnly) {
 	  if (event == null) {
 		  logger.log(Level.WARNING, "Discarding profiling event " + event + " for null device, for Kernel class: " + kernelClass.getName());
 		  return;
 	  }
 	  final KernelDeviceProfile deviceProfile = deviceProfiles.get(device);
       switch (event) {
-         case CLASS_MODEL_BUILT: // fallthrough
-         case OPENCL_GENERATED:  // fallthrough
-         case INIT_JNI:          // fallthrough
-         case OPENCL_COMPILED:   // fallthrough
-         case PREPARE_EXECUTE:   // fallthrough
-         case EXECUTED:          // fallthrough
+         case CLASS_MODEL_BUILT:        // fallthrough
+         case OPENCL_GENERATED:         // fallthrough
+         case INIT_JNI:                 // fallthrough
+         case OPENCL_COMPILED:          // fallthrough
+         case READY_TO_PREPARE_EXECUTE: //falltrhough
+         case PREPARE_EXECUTE:          // fallthrough
+         case EXECUTED:                 // fallthrough
          {
             if (deviceProfile == null) {
                logger.log(Level.SEVERE, "Error in KernelProfile, no currentDevice (synchronization error?");
             }
-            deviceProfile.onEvent(event);
+            deviceProfile.onEvent(event, compileOnly);
             break;
          }
          case START:
diff --git a/src/main/java/com/aparapi/internal/kernel/KernelRunner.java b/src/main/java/com/aparapi/internal/kernel/KernelRunner.java
index 25975a2a1794c648ce07783092536adaf2a4d857..5065ccaa1bcad81ef8586bd1231de195559c65a4 100644
--- a/src/main/java/com/aparapi/internal/kernel/KernelRunner.java
+++ b/src/main/java/com/aparapi/internal/kernel/KernelRunner.java
@@ -478,7 +478,7 @@ public class KernelRunner extends KernelRunnerJNI{
       boolean legacySequentialMode = kernel.getExecutionMode().equals(Kernel.EXECUTION_MODE.SEQ);
 
       passId = PASS_ID_PREPARING_EXECUTION;
-      _settings.profile.onEvent(device, ProfilingEvent.PREPARE_EXECUTE);
+      _settings.profile.onEvent(device, ProfilingEvent.PREPARE_EXECUTE, false);
 
       try {
          if (device == JavaDevice.ALTERNATIVE_ALGORITHM) {
@@ -1370,15 +1370,15 @@ public class KernelRunner extends KernelRunnerJNI{
          Range result;
          switch (_settings.range.getDims()) {
             case 1: {
-               result = Range.create(device, _settings.range.getGlobalSize_0());
+               result = Range.create(kernel, device, _settings.range.getGlobalSize_0());
                break;
             }
             case 2: {
-               result = Range.create2D(device, _settings.range.getGlobalSize_0(), _settings.range.getGlobalSize_1());
+               result = Range.create2D(kernel, device, _settings.range.getGlobalSize_0(), _settings.range.getGlobalSize_1());
                break;
             }
             case 3: {
-               result = Range.create3D(device, _settings.range.getGlobalSize_0(), _settings.range.getGlobalSize_1(), _settings.range.getGlobalSize_2());
+               result = Range.create3D(kernel, device, _settings.range.getGlobalSize_0(), _settings.range.getGlobalSize_1(), _settings.range.getGlobalSize_2());
                break;
             }
             default: {
@@ -1401,7 +1401,7 @@ public class KernelRunner extends KernelRunnerJNI{
    @SuppressWarnings("deprecation")
    synchronized private Kernel fallBackToNextDevice(Device device, ExecutionSettings _settings, Exception _exception, boolean _silently) {
       isFallBack = true;
-      _settings.profile.onEvent(device, ProfilingEvent.EXECUTED);
+      _settings.profile.onEvent(device, ProfilingEvent.EXECUTED, false);
       if (_settings.legacyExecutionMode) {
          if (!_silently && logger.isLoggable(Level.WARNING)) {
             logger.warning("Execution mode " + kernel.getExecutionMode() + " failed for " + kernel + ": " + _exception.getMessage());
@@ -1461,9 +1461,9 @@ public class KernelRunner extends KernelRunnerJNI{
    public synchronized Kernel compile(String _entrypoint, final Device device) throws CompileFailedException {
       KernelProfile profile = KernelManager.instance().getProfile(kernel.getClass());
       KernelPreferences preferences = KernelManager.instance().getPreferences(kernel);
-      Range range = new Range(device, 1);
-	   ExecutionSettings settings = new ExecutionSettings(preferences, profile, _entrypoint, range, 1, false);
-	   return executeInternalInner(settings, device, true);
+      Range range = Range.create((Kernel)null, device, 1);
+	  ExecutionSettings settings = new ExecutionSettings(preferences, profile, _entrypoint, range, 1, false);
+	  return executeInternalInner(settings, device, true);
    }
 
    private synchronized Kernel executeInternalOuter(ExecutionSettings _settings) {
@@ -1571,7 +1571,7 @@ public class KernelRunner extends KernelRunnerJNI{
             device = openCLDevice;
          }
          assert device != null : "No device available";
-         _settings.profile.onStart(device);
+         _settings.profile.onStart(device, compileOnly);
          /* for backward compatibility reasons we still honor execution mode */
          boolean isOpenCl = requestedExecutionMode.isOpenCL() || device instanceof OpenCLDevice;
          if (isOpenCl) {
@@ -1580,9 +1580,9 @@ public class KernelRunner extends KernelRunnerJNI{
                   try {
                      final ClassModel classModel = ClassModel.createClassModel(kernel.getClass());
                      entryPoint = classModel.getEntrypoint(_settings.entrypoint, kernel);
-                     _settings.profile.onEvent(device, ProfilingEvent.CLASS_MODEL_BUILT);
+                     _settings.profile.onEvent(device, ProfilingEvent.CLASS_MODEL_BUILT, false);
                   } catch (final Exception exception) {
-                     _settings.profile.onEvent(device, ProfilingEvent.CLASS_MODEL_BUILT);
+                     _settings.profile.onEvent(device, ProfilingEvent.CLASS_MODEL_BUILT, false);
                      if (compileOnly) {
                         //Cannot fallback in compile only mode
                         throw new CompileFailedException(exception);
@@ -1602,7 +1602,7 @@ public class KernelRunner extends KernelRunnerJNI{
                      // Init the device to check capabilities before emitting the
                      // code that requires the capabilities.
                      jniContextHandle = initJNI(kernel, openCLDevice, jniFlags); // openCLDevice will not be null here
-                     _settings.profile.onEvent(device, ProfilingEvent.INIT_JNI);
+                     _settings.profile.onEvent(device, ProfilingEvent.INIT_JNI, false);
                   } // end of synchronized! issue 68
 
                   if (jniContextHandle == 0) {
@@ -1661,12 +1661,12 @@ public class KernelRunner extends KernelRunnerJNI{
                            else if (Config.enableShowGeneratedOpenCL) {
                               System.out.println(openCL);
                            }
-                           _settings.profile.onEvent(device, ProfilingEvent.OPENCL_GENERATED);
+                           _settings.profile.onEvent(device, ProfilingEvent.OPENCL_GENERATED, compileOnly);
                            openCLCache.put(kernel.getClass(), openCL);
                         }
                         catch (final CodeGenException codeGenException) {
                            openCLCache.put(kernel.getClass(), CODE_GEN_ERROR_MARKER);
-                           _settings.profile.onEvent(device, ProfilingEvent.OPENCL_GENERATED);
+                           _settings.profile.onEvent(device, ProfilingEvent.OPENCL_GENERATED, compileOnly);
                            if (compileOnly) {
                               throw new CompileFailedException(codeGenException);
                            }
@@ -1675,7 +1675,7 @@ public class KernelRunner extends KernelRunnerJNI{
                      }
                      else {
                         if (openCL.equals(CODE_GEN_ERROR_MARKER)) {
-                           _settings.profile.onEvent(device, ProfilingEvent.OPENCL_GENERATED);
+                           _settings.profile.onEvent(device, ProfilingEvent.OPENCL_GENERATED, compileOnly);
                            boolean silently = true; // since we must have already reported the CodeGenException
                            if (compileOnly) {
                               throw new CompileFailedException("Code Gen Error Marker present");
@@ -1705,7 +1705,7 @@ public class KernelRunner extends KernelRunnerJNI{
                         }
                      }
                   }
-                  _settings.profile.onEvent(device, ProfilingEvent.OPENCL_COMPILED);
+                  _settings.profile.onEvent(device, ProfilingEvent.OPENCL_COMPILED, compileOnly);
                   if (handle == 0) {
                      if (compileOnly) {
                         //When compiling a kernel for a specific device device fallback is not allowed
@@ -1720,8 +1720,9 @@ public class KernelRunner extends KernelRunnerJNI{
                      return kernel;
                   }
                }
-                  
-               if (entryPoint != null) {
+               
+               _settings.profile.onEvent(device, ProfilingEvent.READY_TO_PREPARE_EXECUTE, compileOnly);
+               if (entryPoint != null) {                  
                   //Pre-compiled kernels that never executed must resume here 
                   args = new KernelArg[entryPoint.getReferencedFields().size()];
                   int i = 0;
@@ -1848,7 +1849,7 @@ public class KernelRunner extends KernelRunnerJNI{
                   argc = i;
 
                   setArgsJNI(jniContextHandle, args, argc);
-                  _settings.profile.onEvent(device, ProfilingEvent.PREPARE_EXECUTE);
+                  _settings.profile.onEvent(device, ProfilingEvent.PREPARE_EXECUTE, false);
                   
                   kernelNeverExecutedForDeviceHash.putIfAbsent(device, false);
                   try {
@@ -1885,7 +1886,7 @@ public class KernelRunner extends KernelRunnerJNI{
          return kernel;
       }
       finally {
-         _settings.profile.onEvent(device, ProfilingEvent.EXECUTED);
+         _settings.profile.onEvent(device, ProfilingEvent.EXECUTED, compileOnly);
          maybeReportProfile(_settings);
       }
    }
diff --git a/src/main/java/com/aparapi/internal/kernel/ProfilingEvent.java b/src/main/java/com/aparapi/internal/kernel/ProfilingEvent.java
index 24a378dbc17164f2c6b383d8929f7819aa39f917..63e62289d41b54a2bed2378878408f5f54fbacc1 100644
--- a/src/main/java/com/aparapi/internal/kernel/ProfilingEvent.java
+++ b/src/main/java/com/aparapi/internal/kernel/ProfilingEvent.java
@@ -21,7 +21,7 @@ import java.util.concurrent.atomic.AtomicReference;
  * Created by Barney on 02/09/2015.
  */
 public enum ProfilingEvent {
-   START, CLASS_MODEL_BUILT, INIT_JNI, OPENCL_GENERATED, OPENCL_COMPILED, PREPARE_EXECUTE, EXECUTED;
+   START, CLASS_MODEL_BUILT, INIT_JNI, OPENCL_GENERATED, OPENCL_COMPILED, READY_TO_PREPARE_EXECUTE, PREPARE_EXECUTE, EXECUTED;
 
 	
    static final AtomicReference<String[]> stagesNames = new AtomicReference<String[]>(null);
diff --git a/src/test/java/com/aparapi/runtime/AtomicsSupportAdvTest.java b/src/test/java/com/aparapi/runtime/AtomicsSupportAdvTest.java
index 94a8a62cab03ce6708cf92c025b8e939a158e2a5..82c5cbc6a05c7151bd83675d13b47f2e9a7ebe29 100644
--- a/src/test/java/com/aparapi/runtime/AtomicsSupportAdvTest.java
+++ b/src/test/java/com/aparapi/runtime/AtomicsSupportAdvTest.java
@@ -94,7 +94,7 @@ public class AtomicsSupportAdvTest {
     	
         final AtomicKernel kernel = new AtomicKernel(in, out);
         try {
-	        final Range range = openCLDevice.createRange(SIZE/2, SIZE/2);
+	        final Range range = openCLDevice.createRange(kernel, SIZE/2, SIZE/2);
 	        kernel.setExplicit(true);
 	        kernel.put(in);
 	        kernel.execute(range);
@@ -121,7 +121,7 @@ public class AtomicsSupportAdvTest {
     	
         final AtomicKernel kernel = new AtomicKernel(in, out);
         try {
-	        final Range range = openCLDevice.createRange(SIZE/2, SIZE/2);
+	        final Range range = openCLDevice.createRange(kernel, SIZE/2, SIZE/2);
 	        kernel.execute(range);
         } finally {
         	kernel.dispose();
@@ -147,7 +147,7 @@ public class AtomicsSupportAdvTest {
     	
         final AtomicKernel kernel = new AtomicKernel(in, out);
         try {
-	        final Range range = device.createRange(SIZE/2, SIZE/2);
+	        final Range range = device.createRange(kernel, SIZE/2, SIZE/2);
 	        kernel.execute(range);
         } finally {
         	kernel.dispose();
@@ -172,7 +172,7 @@ public class AtomicsSupportAdvTest {
     	
         final AtomicBKernel kernel = new AtomicBKernel(in, out);
         try {
-	        final Range range = openCLDevice.createRange(SIZE/2, SIZE/2);
+	        final Range range = openCLDevice.createRange(kernel, SIZE/2, SIZE/2);
 	        kernel.execute(range);
         } finally {
         	kernel.dispose();
@@ -200,7 +200,7 @@ public class AtomicsSupportAdvTest {
     	
         final AtomicBKernel kernel = new AtomicBKernel(in, out);
         try {
-	        final Range range = device.createRange(SIZE/2, SIZE/2);
+	        final Range range = device.createRange(kernel, SIZE/2, SIZE/2);
 	        kernel.execute(range);
         } finally {
         	kernel.dispose();
diff --git a/src/test/java/com/aparapi/runtime/AtomicsSupportTest.java b/src/test/java/com/aparapi/runtime/AtomicsSupportTest.java
index b7aca3cc7210163644e4020652f61f7e4b1074e3..7bc952a9260416eece81a1d33aa40415c6705454 100644
--- a/src/test/java/com/aparapi/runtime/AtomicsSupportTest.java
+++ b/src/test/java/com/aparapi/runtime/AtomicsSupportTest.java
@@ -83,7 +83,7 @@ public class AtomicsSupportTest {
     	
     	final AtomicAdd kernel = new AtomicAdd(in, out);
     	try {
-	    	final Range range = openCLDevice.createRange(1,1);
+	    	final Range range = openCLDevice.createRange(kernel,1,1);
 	        kernel.setExplicit(true);
 	        kernel.put(in);
 	        kernel.execute(range);
@@ -104,7 +104,7 @@ public class AtomicsSupportTest {
     	
     	final AtomicAdd kernel = new AtomicAdd(in, out);
     	try {
-	    	final Range range = openCLDevice.createRange(1,1);
+	    	final Range range = openCLDevice.createRange(kernel,1,1);
 	        kernel.execute(range);
     	} finally {
     		kernel.dispose();
@@ -124,7 +124,7 @@ public class AtomicsSupportTest {
     	
     	final AtomicAdd kernel = new AtomicAdd(in, out);
     	try {
-	    	final Range range = device.createRange(1,1);
+	    	final Range range = device.createRange(kernel,1,1);
 	    	kernel.execute(range);
     	} finally {
     		kernel.dispose();
@@ -171,7 +171,7 @@ public class AtomicsSupportTest {
     	
     	final AtomicSub kernel = new AtomicSub(in, out);
     	try {
-	    	final Range range = openCLDevice.createRange(1,1);
+	    	final Range range = openCLDevice.createRange(kernel,1,1);
 	        kernel.setExplicit(true);
 	        kernel.put(in);
 	        kernel.execute(range);
@@ -192,7 +192,7 @@ public class AtomicsSupportTest {
     	
     	final AtomicSub kernel = new AtomicSub(in, out);
     	try {
-	    	final Range range = openCLDevice.createRange(1,1);
+	    	final Range range = openCLDevice.createRange(kernel,1,1);
 	        kernel.execute(range);
     	} finally {
     		kernel.dispose();
@@ -212,7 +212,7 @@ public class AtomicsSupportTest {
     	
     	final AtomicSub kernel = new AtomicSub(in, out);
     	try {
-	    	final Range range = device.createRange(1,1);
+	    	final Range range = device.createRange(kernel,1,1);
 	    	kernel.execute(range);
     	} finally {
     		kernel.dispose();
@@ -261,7 +261,7 @@ public class AtomicsSupportTest {
     	
     	final AtomicXchg kernel = new AtomicXchg(in, out);
     	try {
-	    	final Range range = openCLDevice.createRange(1,1);
+	    	final Range range = openCLDevice.createRange(kernel,1,1);
 	        kernel.setExplicit(true);
 	        kernel.put(in);
 	        kernel.execute(range);
@@ -282,7 +282,7 @@ public class AtomicsSupportTest {
     	
     	final AtomicXchg kernel = new AtomicXchg(in, out);
     	try {
-	    	final Range range = openCLDevice.createRange(1,1);
+	    	final Range range = openCLDevice.createRange(kernel,1,1);
 	        kernel.execute(range);
     	} finally {
     		kernel.dispose();
@@ -302,7 +302,7 @@ public class AtomicsSupportTest {
     	
     	final AtomicXchg kernel = new AtomicXchg(in, out);
     	try {
-	    	final Range range = device.createRange(1,1);
+	    	final Range range = device.createRange(kernel,1,1);
 	    	kernel.execute(range);
     	} finally {
     		kernel.dispose();
@@ -349,7 +349,7 @@ public class AtomicsSupportTest {
     	
     	final AtomicInc kernel = new AtomicInc(in, out);
     	try {
-	    	final Range range = openCLDevice.createRange(1,1);
+	    	final Range range = openCLDevice.createRange(kernel,1,1);
 	        kernel.setExplicit(true);
 	        kernel.put(in);
 	        kernel.execute(range);
@@ -369,7 +369,7 @@ public class AtomicsSupportTest {
     	
     	final AtomicInc kernel = new AtomicInc(in, out);
     	try {
-	    	final Range range = openCLDevice.createRange(1,1);
+	    	final Range range = openCLDevice.createRange(kernel,1,1);
 	        kernel.execute(range);
     	} finally {
     		kernel.dispose();
@@ -388,7 +388,7 @@ public class AtomicsSupportTest {
     	
     	final AtomicInc kernel = new AtomicInc(in, out);
     	try {
-	    	final Range range = device.createRange(1,1);
+	    	final Range range = device.createRange(kernel,1,1);
 	    	kernel.execute(range);
     	} finally {
     		kernel.dispose();
@@ -435,7 +435,7 @@ public class AtomicsSupportTest {
     	
     	final AtomicDec kernel = new AtomicDec(in, out);
     	try {
-	    	final Range range = openCLDevice.createRange(1,1);
+	    	final Range range = openCLDevice.createRange(kernel,1,1);
 	        kernel.setExplicit(true);
 	        kernel.put(in);
 	        kernel.execute(range);
@@ -455,7 +455,7 @@ public class AtomicsSupportTest {
     	
     	final AtomicDec kernel = new AtomicDec(in, out);
     	try {
-	    	final Range range = openCLDevice.createRange(1,1);
+	    	final Range range = openCLDevice.createRange(kernel,1,1);
 	        kernel.execute(range);
     	} finally {
     		kernel.dispose();
@@ -474,7 +474,7 @@ public class AtomicsSupportTest {
     	
     	final AtomicDec kernel = new AtomicDec(in, out);
     	try {
-	    	final Range range = device.createRange(1,1);
+	    	final Range range = device.createRange(kernel,1,1);
 	    	kernel.execute(range);
     	} finally {
     		kernel.dispose();
@@ -523,7 +523,7 @@ public class AtomicsSupportTest {
     	
     	final AtomicCmpXchg kernel = new AtomicCmpXchg(in, out);
     	try {
-	    	final Range range = openCLDevice.createRange(1,1);
+	    	final Range range = openCLDevice.createRange(kernel,1,1);
 	        kernel.setExplicit(true);
 	        kernel.put(in);
 	        kernel.execute(range);
@@ -545,7 +545,7 @@ public class AtomicsSupportTest {
     	
     	final AtomicCmpXchg kernel = new AtomicCmpXchg(in, out);
     	try {
-	    	final Range range = openCLDevice.createRange(1,1);
+	    	final Range range = openCLDevice.createRange(kernel,1,1);
 	        kernel.execute(range);
     	} finally {
     		kernel.dispose();
@@ -566,7 +566,7 @@ public class AtomicsSupportTest {
     	
     	final AtomicCmpXchg kernel = new AtomicCmpXchg(in, out);
     	try {
-	    	final Range range = device.createRange(1,1);
+	    	final Range range = device.createRange(kernel,1,1);
 	    	kernel.execute(range);
     	} finally {
     		kernel.dispose();
@@ -586,7 +586,7 @@ public class AtomicsSupportTest {
     	
     	final AtomicCmpXchg kernel = new AtomicCmpXchg(in, out);
     	try {
-	    	final Range range = openCLDevice.createRange(1,1);
+	    	final Range range = openCLDevice.createRange(kernel,1,1);
 	        kernel.setExplicit(true);
 	        kernel.put(in);
 	        kernel.execute(range);
@@ -608,7 +608,7 @@ public class AtomicsSupportTest {
     	
     	final AtomicCmpXchg kernel = new AtomicCmpXchg(in, out);
     	try {
-	    	final Range range = openCLDevice.createRange(1,1);
+	    	final Range range = openCLDevice.createRange(kernel,1,1);
 	        kernel.execute(range);
     	} finally {
     		kernel.dispose();
@@ -629,7 +629,7 @@ public class AtomicsSupportTest {
     	
     	final AtomicCmpXchg kernel = new AtomicCmpXchg(in, out);
     	try {
-	    	final Range range = device.createRange(1,1);
+	    	final Range range = device.createRange(kernel,1,1);
 	    	kernel.execute(range);
     	} finally {
     		kernel.dispose();
@@ -677,7 +677,7 @@ public class AtomicsSupportTest {
     	
     	final AtomicMin kernel = new AtomicMin(in, out);
     	try {
-	    	final Range range = openCLDevice.createRange(1,1);
+	    	final Range range = openCLDevice.createRange(kernel,1,1);
 	        kernel.setExplicit(true);
 	        kernel.put(in);
 	        kernel.execute(range);
@@ -698,7 +698,7 @@ public class AtomicsSupportTest {
     	
     	final AtomicMin kernel = new AtomicMin(in, out);
     	try {
-	    	final Range range = openCLDevice.createRange(1,1);
+	    	final Range range = openCLDevice.createRange(kernel,1,1);
 	        kernel.execute(range);
     	} finally {
     		kernel.dispose();
@@ -718,7 +718,7 @@ public class AtomicsSupportTest {
     	
     	final AtomicMin kernel = new AtomicMin(in, out);
     	try {
-	    	final Range range = device.createRange(1,1);
+	    	final Range range = device.createRange(kernel,1,1);
 	    	kernel.execute(range);
     	} finally {
     		kernel.dispose();
@@ -736,7 +736,7 @@ public class AtomicsSupportTest {
     	
     	final AtomicMin kernel = new AtomicMin(in, out);
     	try {
-	    	final Range range = openCLDevice.createRange(1,1);
+	    	final Range range = openCLDevice.createRange(kernel,1,1);
 	        kernel.setExplicit(true);
 	        kernel.put(in);
 	        kernel.execute(range);
@@ -757,7 +757,7 @@ public class AtomicsSupportTest {
     	
     	final AtomicMin kernel = new AtomicMin(in, out);
     	try {
-	    	final Range range = openCLDevice.createRange(1,1);
+	    	final Range range = openCLDevice.createRange(kernel,1,1);
 	        kernel.execute(range);
     	} finally {
     		kernel.dispose();
@@ -777,7 +777,7 @@ public class AtomicsSupportTest {
     	
     	final AtomicMin kernel = new AtomicMin(in, out);
     	try {
-	    	final Range range = device.createRange(1,1);
+	    	final Range range = device.createRange(kernel,1,1);
 	    	kernel.execute(range);
     	} finally {
     		kernel.dispose();
@@ -825,7 +825,7 @@ public class AtomicsSupportTest {
     	
     	final AtomicMax kernel = new AtomicMax(in, out);
     	try {
-	    	final Range range = openCLDevice.createRange(1,1);
+	    	final Range range = openCLDevice.createRange(kernel,1,1);
 	        kernel.setExplicit(true);
 	        kernel.put(in);
 	        kernel.execute(range);
@@ -846,7 +846,7 @@ public class AtomicsSupportTest {
     	
     	final AtomicMax kernel = new AtomicMax(in, out);
     	try {
-	    	final Range range = openCLDevice.createRange(1,1);
+	    	final Range range = openCLDevice.createRange(kernel,1,1);
 	        kernel.execute(range);
     	} finally {
     		kernel.dispose();
@@ -866,7 +866,7 @@ public class AtomicsSupportTest {
     	
     	final AtomicMax kernel = new AtomicMax(in, out);
     	try {
-	    	final Range range = device.createRange(1,1);
+	    	final Range range = device.createRange(kernel,1,1);
 	    	kernel.execute(range);
     	} finally {
     		kernel.dispose();
@@ -884,7 +884,7 @@ public class AtomicsSupportTest {
     	
     	final AtomicMax kernel = new AtomicMax(in, out);
     	try {
-	    	final Range range = openCLDevice.createRange(1,1);
+	    	final Range range = openCLDevice.createRange(kernel,1,1);
 	        kernel.setExplicit(true);
 	        kernel.put(in);
 	        kernel.execute(range);
@@ -905,7 +905,7 @@ public class AtomicsSupportTest {
     	
     	final AtomicMax kernel = new AtomicMax(in, out);
     	try {
-	    	final Range range = openCLDevice.createRange(1,1);
+	    	final Range range = openCLDevice.createRange(kernel,1,1);
 	        kernel.execute(range);
     	} finally {
     		kernel.dispose();
@@ -925,7 +925,7 @@ public class AtomicsSupportTest {
     	
     	final AtomicMax kernel = new AtomicMax(in, out);
     	try {
-	    	final Range range = device.createRange(1,1);
+	    	final Range range = device.createRange(kernel,1,1);
 	    	kernel.execute(range);
     	} finally {
     		kernel.dispose();
@@ -974,7 +974,7 @@ public class AtomicsSupportTest {
     	
     	final AtomicAnd kernel = new AtomicAnd(in, out);
     	try {
-	    	final Range range = openCLDevice.createRange(1,1);
+	    	final Range range = openCLDevice.createRange(kernel,1,1);
 	        kernel.setExplicit(true);
 	        kernel.put(in);
 	        kernel.execute(range);
@@ -996,7 +996,7 @@ public class AtomicsSupportTest {
     	
     	final AtomicAnd kernel = new AtomicAnd(in, out);
     	try {
-	    	final Range range = openCLDevice.createRange(1,1);
+	    	final Range range = openCLDevice.createRange(kernel,1,1);
 	        kernel.execute(range);
     	} finally {
     		kernel.dispose();
@@ -1016,7 +1016,7 @@ public class AtomicsSupportTest {
     	
     	final AtomicAnd kernel = new AtomicAnd(in, out);
     	try {
-	    	final Range range = device.createRange(1,1);
+	    	final Range range = device.createRange(kernel,1,1);
 	    	kernel.execute(range);
     	} finally {
     		kernel.dispose();
@@ -1065,7 +1065,7 @@ public class AtomicsSupportTest {
     	
     	final AtomicOr kernel = new AtomicOr(in, out);
     	try {
-	    	final Range range = openCLDevice.createRange(1,1);
+	    	final Range range = openCLDevice.createRange(kernel,1,1);
 	        kernel.setExplicit(true);
 	        kernel.put(in);
 	        kernel.execute(range);
@@ -1086,7 +1086,7 @@ public class AtomicsSupportTest {
     	
     	final AtomicOr kernel = new AtomicOr(in, out);
     	try {
-	    	final Range range = openCLDevice.createRange(1,1);
+	    	final Range range = openCLDevice.createRange(kernel,1,1);
 	        kernel.execute(range);
     	} finally {
     		kernel.dispose();
@@ -1106,7 +1106,7 @@ public class AtomicsSupportTest {
     	
     	final AtomicOr kernel = new AtomicOr(in, out);
     	try {
-	    	final Range range = device.createRange(1,1);
+	    	final Range range = device.createRange(kernel,1,1);
 	    	kernel.execute(range);
     	} finally {
     		kernel.dispose();
@@ -1154,7 +1154,7 @@ public class AtomicsSupportTest {
     	
     	final AtomicXor kernel = new AtomicXor(in, out);
     	try {
-	    	final Range range = openCLDevice.createRange(1,1);
+	    	final Range range = openCLDevice.createRange(kernel,1,1);
 	        kernel.setExplicit(true);
 	        kernel.put(in);
 	        kernel.execute(range);
@@ -1175,7 +1175,7 @@ public class AtomicsSupportTest {
     	
     	final AtomicXor kernel = new AtomicXor(in, out);
     	try {
-	    	final Range range = openCLDevice.createRange(1,1);
+	    	final Range range = openCLDevice.createRange(kernel,1,1);
 	        kernel.execute(range);
     	} finally {
     		kernel.dispose();
@@ -1195,7 +1195,7 @@ public class AtomicsSupportTest {
     	
     	final AtomicXor kernel = new AtomicXor(in, out);
     	try {
-	    	final Range range = device.createRange(1,1);
+	    	final Range range = device.createRange(kernel,1,1);
 	    	kernel.execute(range);
     	} finally {
     		kernel.dispose();
diff --git a/src/test/java/com/aparapi/runtime/BarrierSupportTest.java b/src/test/java/com/aparapi/runtime/BarrierSupportTest.java
index bf5697a2026647e89fa39d0640dbd47098e51d11..c142520211318fd16ab27be10568cbabe77f6a76 100644
--- a/src/test/java/com/aparapi/runtime/BarrierSupportTest.java
+++ b/src/test/java/com/aparapi/runtime/BarrierSupportTest.java
@@ -94,7 +94,7 @@ public class BarrierSupportTest {
        }
 
         try {
-	         final Range range = openCLDevice.createRange(SIZE, SIZE);
+	         final Range range = openCLDevice.createRange(kernel, SIZE, SIZE);
 	         targetArray = initInputArray();
 	         kernel.setExplicit(false);
 	         kernel.setArray(targetArray);
@@ -124,7 +124,7 @@ public class BarrierSupportTest {
         }
 
         try {
-	         final Range range = openCLDevice.createRange(SIZE, SIZE);
+	         final Range range = openCLDevice.createRange(kernel, SIZE, SIZE);
 	         targetArray = initInputArray();
 	         kernel.setExplicit(true);
 	         kernel.setArray(targetArray);
@@ -146,7 +146,7 @@ public class BarrierSupportTest {
 
         final Barrrier1Kernel kernel = new Barrrier1Kernel(SIZE);
         try {
-	         final Range range = device.createRange(SIZE, SIZE);
+	         final Range range = device.createRange(kernel, SIZE, SIZE);
 	         targetArray = initInputArray();
 	         kernel.setExplicit(false);
 	         kernel.setArray(targetArray);
@@ -176,7 +176,7 @@ public class BarrierSupportTest {
         }
 
         try {
-	         final Range range = openCLDevice.createRange(SIZE, SIZE);
+	         final Range range = openCLDevice.createRange(kernel, SIZE, SIZE);
 	         targetArray = initInputArray();
 	         kernel.setExplicit(false);
 	         kernel.setArray(targetArray);
@@ -206,7 +206,7 @@ public class BarrierSupportTest {
         }
         
         try {
-	         final Range range = openCLDevice.createRange(SIZE, SIZE);
+	         final Range range = openCLDevice.createRange(kernel, SIZE, SIZE);
 	         targetArray = initInputArray();
 	         kernel.setExplicit(true);
 	         kernel.setArray(targetArray);
@@ -228,7 +228,7 @@ public class BarrierSupportTest {
 
         final Barrrier2Kernel kernel = new Barrrier2Kernel(SIZE);
         try {
-	         final Range range = device.createRange(SIZE, SIZE);
+	         final Range range = device.createRange(kernel, SIZE, SIZE);
 	         targetArray = initInputArray();
 	         kernel.setExplicit(false);
 	         kernel.setArray(targetArray);
diff --git a/src/test/java/com/aparapi/runtime/BufferTransferTest.java b/src/test/java/com/aparapi/runtime/BufferTransferTest.java
index 8edf47a714822c88132f7e5c89e952897822596c..1467fc939fb8b3ac68465ec7300f3888aa2964f1 100644
--- a/src/test/java/com/aparapi/runtime/BufferTransferTest.java
+++ b/src/test/java/com/aparapi/runtime/BufferTransferTest.java
@@ -51,7 +51,7 @@ public class BufferTransferTest {
         if (maxSize < SIZE) {
            SIZE = maxSize;
         }
-        final Range range = openCLDevice.createRange(SIZE);
+        final Range range = openCLDevice.createRange(kernel, SIZE);
 
         kernel.in = new int[SIZE];
         kernel.out = new int[SIZE];
@@ -76,7 +76,7 @@ public class BufferTransferTest {
         if (maxSize < SIZE) {
            SIZE = maxSize;
         }
-        final Range range = openCLDevice.createRange(SIZE);
+        final Range range = openCLDevice.createRange(kernel, SIZE);
 
         kernel.values = new int[SIZE];
         kernel.result = new int[SIZE];
@@ -129,7 +129,7 @@ public class BufferTransferTest {
            SIZE = maxSize;
         }        
         kernel.setExplicit(true);
-        final Range range = openCLDevice.createRange(SIZE);
+        final Range range = openCLDevice.createRange(kernel, SIZE);
 
         kernel.values = new int[SIZE];
         kernel.result = new int[SIZE];
diff --git a/src/test/java/com/aparapi/runtime/IntArray2DTest.java b/src/test/java/com/aparapi/runtime/IntArray2DTest.java
index cdae400fc4dd8305dd0ba4119866c731da57e73a..03b4fea34ea62fa6f229e1cd789ddf20abd3a208 100644
--- a/src/test/java/com/aparapi/runtime/IntArray2DTest.java
+++ b/src/test/java/com/aparapi/runtime/IntArray2DTest.java
@@ -70,7 +70,7 @@ public class IntArray2DTest {
                 }
             };
             
-        final Range range = openCLDevice.createRange(size);
+        final Range range = openCLDevice.createRange(kernel, size);
         try {
             kernel.execute(range);
 
diff --git a/src/test/java/com/aparapi/runtime/JtpRangeIdsTest.java b/src/test/java/com/aparapi/runtime/JtpRangeIdsTest.java
index dbad276acca1c6c2e2c6dd61c1153a57d246f78d..791ec6dea98872a886fe37d51f172c943a539eb3 100644
--- a/src/test/java/com/aparapi/runtime/JtpRangeIdsTest.java
+++ b/src/test/java/com/aparapi/runtime/JtpRangeIdsTest.java
@@ -464,7 +464,7 @@ public class JtpRangeIdsTest {
     @Test
     public void test() {
         MatrixKernel kernel = new MatrixKernel();
-        kernel.execute(Range.create2D(12, 4, 4, 2));
+        kernel.execute(Range.create2D(kernel, 12, 4, 4, 2));
         for(boolean hasPassed : kernel.passed) {
             Assert.assertTrue("Resulting matrix was invalid", hasPassed);
         }
diff --git a/src/test/java/com/aparapi/runtime/KernelAndDeviceItemSizeLimitsTest.java b/src/test/java/com/aparapi/runtime/KernelAndDeviceItemSizeLimitsTest.java
index 98351d06cd6144f31243db91082bf72d4b371383..4f4a28b00aa2e31fe5199b6990195eb495f7abd7 100644
--- a/src/test/java/com/aparapi/runtime/KernelAndDeviceItemSizeLimitsTest.java
+++ b/src/test/java/com/aparapi/runtime/KernelAndDeviceItemSizeLimitsTest.java
@@ -86,7 +86,7 @@ public class KernelAndDeviceItemSizeLimitsTest {
       
       assertTrue("Max Local Mem Size should be greater or equal to 0", maxLocalMemSize >= 0);
       
-      Range r = Range.create(openCLDevice, SIZE, SIZE);
+      Range r = Range.create(myKernel, openCLDevice, SIZE, SIZE);
       myKernel.execute(r);
       
       int[] results = myKernel.getResults();
@@ -111,7 +111,7 @@ public class KernelAndDeviceItemSizeLimitsTest {
       
       assertTrue("Max Private Mem Size should be greater than 0", maxPrivateMemSize >= 0);
       
-      Range r = Range.create(openCLDevice, SIZE);
+      Range r = Range.create(myKernel, openCLDevice, SIZE);
       myKernel.execute(r);
       
       int[] results = myKernel.getResults();
@@ -136,7 +136,7 @@ public class KernelAndDeviceItemSizeLimitsTest {
       
       assertTrue("Max Kernel Workgroup Size should be greater than 0", maxWorkGroupSize > 0);
       
-      Range r = Range.create(openCLDevice, SIZE);
+      Range r = Range.create(myKernel, openCLDevice, SIZE);
       myKernel.execute(r);
       
       int[] results = myKernel.getResults();
@@ -161,7 +161,7 @@ public class KernelAndDeviceItemSizeLimitsTest {
       
       assertTrue("Preferred Kernel Workgroup Size Multiple should be greater than 0", preferredWorkGroupSizeMultiple > 0);
       
-      Range r = Range.create(openCLDevice, SIZE);
+      Range r = Range.create(myKernel, openCLDevice, SIZE);
       myKernel.execute(r);
       
       int[] results = myKernel.getResults();
@@ -190,7 +190,7 @@ public class KernelAndDeviceItemSizeLimitsTest {
          assertTrue("Kernel Compile Work Group Size should be greater or equal than zero at index=" + i, maxWorkItemSize[i] >= 0);
       }
       
-      Range r = Range.create(openCLDevice, SIZE);
+      Range r = Range.create(myKernel, openCLDevice, SIZE);
       myKernel.execute(r);
       
       int[] results = myKernel.getResults();
@@ -222,7 +222,7 @@ public class KernelAndDeviceItemSizeLimitsTest {
       
       assertTrue("Max Local Mem Size should be equal or greater to 0", maxLocalMemSize >= 0);
       
-      Range r = Range.create(device, SIZE, SIZE);
+      Range r = Range.create(myKernel, device, SIZE, SIZE);
       myKernel.execute(r);
       
       int[] results = myKernel.getResults();
@@ -251,7 +251,7 @@ public class KernelAndDeviceItemSizeLimitsTest {
       
       assertTrue("Max Private Mem Size should be equal or greater to 0", maxPrivateMemSize >= 0);
       
-      Range r = Range.create(device, SIZE, SIZE);
+      Range r = Range.create(myKernel, device, SIZE, SIZE);
       myKernel.execute(r);
       
       int[] results = myKernel.getResults();
@@ -280,7 +280,7 @@ public class KernelAndDeviceItemSizeLimitsTest {
       
       assertTrue("Max Kernel Workgroup Size should be equal or greater than 0", maxWorkGroupSize >= 0);
       
-      Range r = Range.create(device, SIZE, SIZE);
+      Range r = Range.create(myKernel, device, SIZE, SIZE);
       myKernel.execute(r);
       
       int[] results = myKernel.getResults();
@@ -309,7 +309,7 @@ public class KernelAndDeviceItemSizeLimitsTest {
       
       assertTrue("Preferred Kernel Workgroup Size Multiple should be equal to 1", preferredWorkGroupSizeMultiple == 1);
       
-      Range r = Range.create(device, SIZE, SIZE);
+      Range r = Range.create(myKernel, device, SIZE, SIZE);
       myKernel.execute(r);
       
       int[] results = myKernel.getResults();
@@ -342,7 +342,7 @@ public class KernelAndDeviceItemSizeLimitsTest {
          assertTrue("Kernel Compile Work Group Size should be greater or equal than zero at index=" + i, maxWorkItemSize[i] >= 0);
       }
       
-      Range r = Range.create(device, SIZE, SIZE);
+      Range r = Range.create(myKernel, device, SIZE, SIZE);
       myKernel.execute(r);
       
       int[] results = myKernel.getResults();
diff --git a/src/test/java/com/aparapi/runtime/KernelCompileOnlyTest.java b/src/test/java/com/aparapi/runtime/KernelCompileOnlyTest.java
index c921a8bf519b950c517574b68f1953310036578d..5f8f19082951caf15ce870b078bcc2c6d9af8cd4 100644
--- a/src/test/java/com/aparapi/runtime/KernelCompileOnlyTest.java
+++ b/src/test/java/com/aparapi/runtime/KernelCompileOnlyTest.java
@@ -98,7 +98,7 @@ public class KernelCompileOnlyTest {
          fail("This shouldn't happen");
       }
       
-      Range r = Range.create(openCLDevice, SIZE);
+      Range r = Range.create(myKernel, openCLDevice, SIZE);
       myKernel.execute(r);
       
       int[] results = myKernel.getResults();
@@ -126,7 +126,7 @@ public class KernelCompileOnlyTest {
          fail("This shouldn't happen");
       }
       
-      Range r = Range.create(openCLDevice, SIZE);
+      Range r = Range.create(myKernel, openCLDevice, SIZE);
       myKernel.execute(r);
       
       int[] results = myKernel.getResults();
@@ -146,7 +146,7 @@ public class KernelCompileOnlyTest {
       }
       
       
-      Range r = Range.create(openCLDevice, SIZE);
+      Range r = Range.create(myKernel, openCLDevice, SIZE);
       myKernel.execute(r);
       
       int[] results = myKernel.getResults();
@@ -209,7 +209,7 @@ public class KernelCompileOnlyTest {
          fail("This shouldn't happen");
       }
       
-      Range r = Range.create(device, SIZE, SIZE);
+      Range r = Range.create(myKernel, device, SIZE, SIZE);
       myKernel.execute(r);
       
       int[] resultsB = myKernel.getResults();
diff --git a/src/test/java/com/aparapi/runtime/LoadClTest.java b/src/test/java/com/aparapi/runtime/LoadClTest.java
index 0f6c126fe5d61567c75911b384dfc3cb3add14cc..fad93caab1a7ff969f8a86914284233760f831ea 100644
--- a/src/test/java/com/aparapi/runtime/LoadClTest.java
+++ b/src/test/java/com/aparapi/runtime/LoadClTest.java
@@ -39,7 +39,6 @@ public class LoadClTest {
 
         final float[] squares = new float[size];
         final float[] quads = new float[size];
-        final Range range = Range.create(size);
 
         final Device device = KernelManager.instance().bestDevice();
 
@@ -47,6 +46,7 @@ public class LoadClTest {
             final OpenCLDevice openclDevice = (OpenCLDevice) device;
 
             final Squarer squarer = openclDevice.bind(Squarer.class);
+            final Range range = Range.create(squarer, openclDevice, size);
             squarer.square(range, in, squares);
 
             for (int i = 0; i < size; i++) {
diff --git a/src/test/java/com/aparapi/runtime/LocalArrayArgsTest.java b/src/test/java/com/aparapi/runtime/LocalArrayArgsTest.java
index 2ea3ba7b4318ea6d816a3e1cf44b1898b665293e..96a757542bb051da3ed9b2c659e5fa23c131b762 100644
--- a/src/test/java/com/aparapi/runtime/LocalArrayArgsTest.java
+++ b/src/test/java/com/aparapi/runtime/LocalArrayArgsTest.java
@@ -50,7 +50,7 @@ public class LocalArrayArgsTest {
     public void test() {
         final LocalArrayArgsKernel kernel = new LocalArrayArgsKernel();
         try {
-	        final Range range = openCLDevice.createRange(SIZE, SIZE);
+	        final Range range = openCLDevice.createRange(kernel, SIZE, SIZE);
 	        targetArray = new int[SIZE];
 	        kernel.setExplicit(false);
 	        kernel.setArray(targetArray);
@@ -65,7 +65,7 @@ public class LocalArrayArgsTest {
     public void testExplicit() {
         final LocalArrayArgsKernel kernel = new LocalArrayArgsKernel();
         try {
-	        final Range range = openCLDevice.createRange(SIZE, SIZE);
+	        final Range range = openCLDevice.createRange(kernel, SIZE, SIZE);
 	        targetArray = new int[SIZE];
 	        kernel.setExplicit(true);
 	        kernel.setArray(targetArray);
diff --git a/src/test/java/com/aparapi/runtime/LocalAtomicVariableArrayTest.java b/src/test/java/com/aparapi/runtime/LocalAtomicVariableArrayTest.java
index 6f70f16cbe7fdd4057d3d272430892743331d46f..46403eb2b38a2fba763cad14d14f387d3b617dc2 100644
--- a/src/test/java/com/aparapi/runtime/LocalAtomicVariableArrayTest.java
+++ b/src/test/java/com/aparapi/runtime/LocalAtomicVariableArrayTest.java
@@ -74,7 +74,7 @@ public class LocalAtomicVariableArrayTest {
     @Test
     public void simpleConstIndexOpenCLTest() {
         SimpleConstIndexLocalVarKernel myKernel = new SimpleConstIndexLocalVarKernel();
-        Range range = openCLDevice.createRange(SIZE, SIZE);
+        Range range = openCLDevice.createRange(myKernel, SIZE, SIZE);
         try {
             myKernel.execute(range);
             assertEquals("Atomic increment doesn't match, index 1", SIZE, myKernel.atomics[1].get());
@@ -88,7 +88,7 @@ public class LocalAtomicVariableArrayTest {
     @Test
     public void simpleVarIndexOpenCLTest() {
         SimpleVarIndexLocalVarKernel myKernel = new SimpleVarIndexLocalVarKernel();
-        Range range = openCLDevice.createRange(SIZE, SIZE);
+        Range range = openCLDevice.createRange(myKernel, SIZE, SIZE);
         try {
             myKernel.execute(range);
             assertEquals("Atomic increment doesn't match", SIZE, myKernel.atomics[4].get());
diff --git a/src/test/java/com/aparapi/runtime/MultiDimensionalLocalArrayTest.java b/src/test/java/com/aparapi/runtime/MultiDimensionalLocalArrayTest.java
index 4a88236ceb0828acdce52d690f795f55755fb464..aed6736b1b54f3e65dacd5206bf902d956ff8548 100644
--- a/src/test/java/com/aparapi/runtime/MultiDimensionalLocalArrayTest.java
+++ b/src/test/java/com/aparapi/runtime/MultiDimensionalLocalArrayTest.java
@@ -116,7 +116,7 @@ public class MultiDimensionalLocalArrayTest
             }
         };
         try {
-        	kernel.execute(Range.create2D(device, SIZE, SIZE, SIZE, SIZE));
+        	kernel.execute(Range.create2D(kernel, device, SIZE, SIZE, SIZE, SIZE));
         } finally {
         	kernel.dispose();
         }
@@ -152,9 +152,9 @@ public class MultiDimensionalLocalArrayTest
             }
         };
         try {
-        	kernel.execute(Range.create2D(device, SIZE, SIZE, SIZE, SIZE));
+        	kernel.execute(Range.create2D(kernel, device, SIZE, SIZE, SIZE, SIZE));
         	assertEquals(3840, RESULT[0], 1E-6F);
-        	kernel.execute(Range.create2D(device, SIZE, SIZE, SIZE, SIZE));
+        	kernel.execute(Range.create2D(kernel, device, SIZE, SIZE, SIZE, SIZE));
         	assertEquals(3840, RESULT[0], 1E-6F);
         } finally {
         	kernel.dispose();
@@ -190,7 +190,7 @@ public class MultiDimensionalLocalArrayTest
             }
         };
         try {
-        	kernel.execute(Range.create2D(device, SIZE, SIZE, SIZE, SIZE));
+        	kernel.execute(Range.create2D(kernel, device, SIZE, SIZE, SIZE, SIZE));
         } finally {
         	kernel.dispose();
         }
@@ -227,9 +227,9 @@ public class MultiDimensionalLocalArrayTest
         };
         
         try {
-	        kernel.execute(Range.create2D(device, SIZE, SIZE, SIZE, SIZE));
+	        kernel.execute(Range.create2D(kernel, device, SIZE, SIZE, SIZE, SIZE));
 	        assertEquals(3840, RESULT[0][0], 1E-6F);
-	        kernel.execute(Range.create2D(device, SIZE, SIZE, SIZE, SIZE));
+	        kernel.execute(Range.create2D(kernel, device, SIZE, SIZE, SIZE, SIZE));
 	        assertEquals(3840, RESULT[0][0], 1E-6F);
         } finally {
         	kernel.dispose();
@@ -321,10 +321,10 @@ public class MultiDimensionalLocalArrayTest
         try {
         	kernel.setResult(RESULT);
         	kernel.setArray(SIZE, new float[SIZE*SIZE]);
-        	kernel.execute(Range.create2D(device, SIZE, SIZE, SIZE, SIZE));
+        	kernel.execute(Range.create2D(kernel, device, SIZE, SIZE, SIZE, SIZE));
         	assertEquals(448, RESULT[0], 1E-6F);
         	kernel.setArray(2*SIZE, new float[2*SIZE*2*SIZE]);
-        	kernel.execute(Range.create2D(device, 2*SIZE, 2*SIZE, 2*SIZE, 2*SIZE));
+        	kernel.execute(Range.create2D(kernel, device, 2*SIZE, 2*SIZE, 2*SIZE, 2*SIZE));
         	assertTrue("Result is not greater than 448", RESULT[0]>448);
         } finally {
         	kernel.dispose();
@@ -342,10 +342,10 @@ public class MultiDimensionalLocalArrayTest
         try {
         	kernel.setResult(RESULT);
         	kernel.setArray(SIZE, new float[SIZE][SIZE]);
-	        kernel.execute(Range.create2D(device, SIZE, SIZE, SIZE, SIZE));
+	        kernel.execute(Range.create2D(kernel, device, SIZE, SIZE, SIZE, SIZE));
 	        assertEquals(448, RESULT[0], 1E-6F);
 	        kernel.setArray(2*SIZE, new float[2*SIZE][2*SIZE]);
-	        kernel.execute(Range.create2D(device, 2*SIZE, 2*SIZE, 2*SIZE, 2*SIZE));
+	        kernel.execute(Range.create2D(kernel, device, 2*SIZE, 2*SIZE, 2*SIZE, 2*SIZE));
 	        assertTrue("Result is not greater than 448", RESULT[0]>448);
         } finally {
         	kernel.dispose();
diff --git a/src/test/java/com/aparapi/runtime/MultiplePassesMemoryConsumptionTest.java b/src/test/java/com/aparapi/runtime/MultiplePassesMemoryConsumptionTest.java
index 0cffdad1f5b2eff9b5ba6bd6770caa41047f077a..cc78655b6364bd5d6608e2be45f01d5839e2ca4f 100644
--- a/src/test/java/com/aparapi/runtime/MultiplePassesMemoryConsumptionTest.java
+++ b/src/test/java/com/aparapi/runtime/MultiplePassesMemoryConsumptionTest.java
@@ -37,7 +37,7 @@ public class MultiplePassesMemoryConsumptionTest {
             System.gc();
             if( baseFree > Runtime.getRuntime().freeMemory())
                 baseFree = Runtime.getRuntime().freeMemory();
-            kernel.execute(Range.create(512, 64), 1);
+            kernel.execute(Range.create(kernel, 512, 64), 1);
             for (int i = 0; i < globalArray.length; ++i) {
                 Assert.assertEquals("Wrong", i, globalArray[i]);
             }
@@ -49,7 +49,7 @@ public class MultiplePassesMemoryConsumptionTest {
             System.gc();
             if( testFree > Runtime.getRuntime().freeMemory())
                 testFree = Runtime.getRuntime().freeMemory();
-            kernel.execute(Range.create(512, 64), 2);
+            kernel.execute(Range.create(kernel, 512, 64), 2);
             for (int i = 0; i < globalArray.length; ++i) {
                 Assert.assertEquals("Wrong", i, globalArray[i]);
             }
diff --git a/src/test/java/com/aparapi/runtime/NegativeIntegerTest.java b/src/test/java/com/aparapi/runtime/NegativeIntegerTest.java
index 8cd9a5405601e608c24796bf2b0884dbed8d3250..3a43c04404cfc044c24398e30b346df306fb5d45 100644
--- a/src/test/java/com/aparapi/runtime/NegativeIntegerTest.java
+++ b/src/test/java/com/aparapi/runtime/NegativeIntegerTest.java
@@ -78,7 +78,7 @@ public class NegativeIntegerTest
                 RESULT[0] = -800;
             }
         };
-        kernel.execute(Range.create(device, SIZE, SIZE));
+        kernel.execute(Range.create(kernel, device, SIZE, SIZE));
         assertEquals("Result doesn't match", -800, RESULT[0]);
     }
 }
diff --git a/src/test/java/com/aparapi/runtime/ProfileReportBackwardsCompatTest.java b/src/test/java/com/aparapi/runtime/ProfileReportBackwardsCompatTest.java
index c50894b3f71d0e3a6fb3c94d4bb5ede80c430e7f..262c33557769231bb737d13a42e8d2256fe573f4 100644
--- a/src/test/java/com/aparapi/runtime/ProfileReportBackwardsCompatTest.java
+++ b/src/test/java/com/aparapi/runtime/ProfileReportBackwardsCompatTest.java
@@ -104,7 +104,7 @@ public class ProfileReportBackwardsCompatTest {
     public void sequentialSingleThreadOpenCLTest() throws Exception {
     	setUpBefore();
     	logger.log(Level.INFO, "Test " + name.getMethodName() + " - Executing on device: " + openCLDevice.getShortDescription() + " - " + openCLDevice.getName());
-    	assertTrue(sequentialSingleThreadTestHelper(openCLDevice, 128));
+    	assertTrue(sequentialSingleThreadTestHelper(openCLDevice, 128, false));
     }
 
     /**
@@ -115,11 +115,11 @@ public class ProfileReportBackwardsCompatTest {
     public void sequentialSingleThreadJTPTest() {
     	KernelManager.setKernelManager(new JTPKernelManager());
     	Device device = KernelManager.instance().bestDevice();
-    	assertTrue(sequentialSingleThreadTestHelper(device, 16));
+    	assertTrue(sequentialSingleThreadTestHelper(device, 16, true));
     }
 
     
-    public boolean sequentialSingleThreadTestHelper(Device device, int size) {
+    public boolean sequentialSingleThreadTestHelper(Device device, int size, boolean isJTP) {
     	final int runs = 100;
     	final int inputArray[] = new int[size];
     	double accumulatedExecutionTime = 0.0;
@@ -128,8 +128,10 @@ public class ProfileReportBackwardsCompatTest {
     	final Basic1Kernel kernel = new Basic1Kernel();
     	
     	int[] outputArray = null;
-    	Range range = device.createRange(size, size);
-    	long startOfExecution = System.currentTimeMillis();
+    	
+    	long startOfExecution = System.nanoTime();
+    	//Range must be created here, to account for the Kernel compilation time, which is triggered on Range creation
+    	Range range = device.createRange(kernel, size, size);
     	try {
     		for (int i = 0; i < runs; i++) {
     			outputArray = Arrays.copyOf(inputArray, inputArray.length);
@@ -139,17 +141,18 @@ public class ProfileReportBackwardsCompatTest {
     			accumulatedExecutionTime += lastExecutionTime;
     			lastConversionTime = kernel.getConversionTime();
     		}
-    		long runTime = System.currentTimeMillis() - startOfExecution;
+    		double runTime = (double)(System.nanoTime() - startOfExecution) / 1000000.0;
     		WeakReference<ProfileReport> reportRef = kernel.getProfileReportLastThread(device);
     		ProfileReport report = reportRef.get();
     		assertEquals("Number of profiling reports doesn't match the expected", runs, report.getReportId());
     		assertEquals("Aparapi Accumulated execution time doesn't match", accumulatedExecutionTime, kernel.getAccumulatedExecutionTime(), 1e-10);
     		assertEquals("Aparapi last execution time doesn't match last report", lastExecutionTime, report.getExecutionTime(), 1e-10);
     		assertEquals("Aparapi last conversion time doesn't match last report", lastConversionTime, report.getConversionTime(), 1e-10);
-    		//FIXME This is a temporary workaround, however the time profiling should be accurately measured instead of relying on Java timer
-    		//Here we allow a 20% error margin for machines under heavy load during the test, where latency is higher
+    		//Here we allow a 10% error margin for machines under heavy load during the test, where latency is higher
     		//as well as, introduce a 250ms tolerance for fast machines for which the execution time is of the same order of the Java latency
-    		assertEquals("Test estimated accumulated time doesn't match within a 20% time window", runTime, accumulatedExecutionTime, 0.2f * runTime + 250);
+    		if (!isJTP) {
+    		   assertEquals("Test estimated accumulated time doesn't match within a 15% time window", runTime, accumulatedExecutionTime, 0.15f * runTime + 250);
+    		}
     		assertTrue(validateBasic1Kernel(inputArray, outputArray));
     	} finally {
     		kernel.registerProfileReportObserver(null);
@@ -165,7 +168,7 @@ public class ProfileReportBackwardsCompatTest {
     	private double lastExecutionTime = 0.0;
     	private double lastConversionTime = 0.0;
     	private long startOfExecution = 0;
-    	private long runTime = 0;
+    	private double runTime = 0;
     }
     
     /**
@@ -176,7 +179,7 @@ public class ProfileReportBackwardsCompatTest {
     public void threadedSingleThreadPerKernelOpenCLTest() throws Exception {
     	setUpBefore();
     	logger.log(Level.INFO, "Test " + name.getMethodName() + " - Executing on device: " + openCLDevice.getShortDescription() + " - " + openCLDevice.getName());
-    	assertTrue(threadedSingleThreadPerKernelTestHelper(openCLDevice, 128));
+    	assertTrue(threadedSingleThreadPerKernelTestHelper(openCLDevice, 128, false));
     }
     
     /**
@@ -186,10 +189,10 @@ public class ProfileReportBackwardsCompatTest {
     public void threadedSingleThreadPerKernelJTPTest() {
     	KernelManager.setKernelManager(new JTPKernelManager());
     	Device device = KernelManager.instance().bestDevice();
-    	assertTrue(threadedSingleThreadPerKernelTestHelper(device, 16));
+    	assertTrue(threadedSingleThreadPerKernelTestHelper(device, 16, true));
     }
     
-    public boolean threadedSingleThreadPerKernelTestHelper(Device device, final int size) {
+    public boolean threadedSingleThreadPerKernelTestHelper(Device device, final int size, boolean isJTP) {
     	final int runs = 100;
     	final int inputArray[] = new int[size];
     	
@@ -208,16 +211,16 @@ public class ProfileReportBackwardsCompatTest {
 	    	ExecutorService executorService = Executors.newFixedThreadPool(2);
 	    	try {
 				kernels.forEach(k -> executorService.submit(() -> {
-					results[k.getId() - 1].startOfExecution = System.currentTimeMillis();
+					results[k.getId() - 1].startOfExecution = System.nanoTime();
 					for (int i = 0; i < runs; i++) {
 						results[k.getId() - 1].outputArray = Arrays.copyOf(inputArray, inputArray.length);
 						k.setInputOuputArray(results[k.getId() - 1].outputArray);
-	    				k.execute(Range.create(device, size, size));
+	    				k.execute(Range.create(k, device, size, size));
 	    				results[k.getId() - 1].lastExecutionTime = k.getExecutionTime();
 	    				results[k.getId() - 1].accumulatedExecutionTime += results[k.getId() - 1].lastExecutionTime;
 	    				results[k.getId() - 1].lastConversionTime = k.getConversionTime();
 					}
-					results[k.getId() - 1].runTime = System.currentTimeMillis() - results[k.getId() - 1].startOfExecution;
+					results[k.getId() - 1].runTime = (System.nanoTime() - results[k.getId() - 1].startOfExecution) / 1000000.0;
 				}));
 	    	} finally {
 	    		executorService.shutdown();
@@ -241,10 +244,11 @@ public class ProfileReportBackwardsCompatTest {
     		assertEquals("Aparapi Accumulated execution time doesn't match", results[0].accumulatedExecutionTime, kernel1.getAccumulatedExecutionTime(), 1e-10);
     		assertEquals("Aparapi last execution time doesn't match last report", results[0].lastExecutionTime, report.getExecutionTime(), 1e-10);
     		assertEquals("Aparapi last conversion time doesn't match last report", results[0].lastConversionTime, report.getConversionTime(), 1e-10);
-                //FIXME This is a temporary workaround, however the time profiling should be accurately measured instead of relying on Java timer
-                //Here we allow a 20% error margin for machines under heavy load during the test, where latency is higher
-                //as well as, introduce a 250ms tolerance for fast machines for which the execution time is of the same order of the Java latency
-    		assertEquals("Test estimated accumulated time doesn't match within a 20% time window", results[0].runTime, results[0].accumulatedExecutionTime, 0.2f * results[0].runTime + 250);
+            //Here we allow a 10% error margin for machines under heavy load during the test, where latency is higher
+            //as well as, introduce a 250ms tolerance for fast machines for which the execution time is of the same order of the Java latency
+    		if (!isJTP) {
+    		   assertEquals("Test estimated accumulated time doesn't match within a 10% time window", results[0].runTime, results[0].accumulatedExecutionTime, 0.1f * results[0].runTime + 250);
+    		}
     		assertTrue(validateBasic1Kernel(inputArray, results[0].outputArray));
     		
     		//Validate kernel2 reports
@@ -254,10 +258,11 @@ public class ProfileReportBackwardsCompatTest {
     		assertEquals("Aparapi Accumulated execution time doesn't match", results[1].accumulatedExecutionTime, kernel2.getAccumulatedExecutionTime(), 1e-10);
     		assertEquals("Aparapi last execution time doesn't match last report", results[1].lastExecutionTime, report.getExecutionTime(), 1e-10);
     		assertEquals("Aparapi last conversion time doesn't match last report", results[1].lastConversionTime, report.getConversionTime(), 1e-10);
-                //FIXME This is a temporary workaround, however the time profiling should be accurately measured instead of relying on Java timer
-                //Here we allow a 20% error margin for machines under heavy load during the test, where latency is higher
-                //as well as, introduce a 250ms tolerance for fast machines for which the execution time is of the same order of the Java latency
-    		assertEquals("Test estimated accumulated time doesn't match within a 20% time window", results[1].runTime, results[1].accumulatedExecutionTime, 0.2f * results[1].runTime + 250);
+            //Here we allow a 10% error margin for machines under heavy load during the test, where latency is higher
+            //as well as, introduce a 250ms tolerance for fast machines for which the execution time is of the same order of the Java latency
+    		if (!isJTP) {
+    		   assertEquals("Test estimated accumulated time doesn't match within a 20% time window", results[1].runTime, results[1].accumulatedExecutionTime, 0.1f * results[1].runTime + 250);
+    		}
     		assertTrue(validateBasic2Kernel(inputArray, results[1].outputArray));
     	} finally {
     		kernel1.registerProfileReportObserver(null);
diff --git a/src/test/java/com/aparapi/runtime/ProfileReportNewAPITest.java b/src/test/java/com/aparapi/runtime/ProfileReportNewAPITest.java
index ceb1e6a416a0ffb02ba65c54983fdc5ddfb00a15..f27e4f5f0ee81b5108499cf959ed5fe41be4f76f 100644
--- a/src/test/java/com/aparapi/runtime/ProfileReportNewAPITest.java
+++ b/src/test/java/com/aparapi/runtime/ProfileReportNewAPITest.java
@@ -113,7 +113,7 @@ public class ProfileReportNewAPITest {
     public void singleThreadedSingleKernelObserverOpenCLTest() throws Exception {
     	setUpBefore();
     	logger.log(Level.INFO, "Test " + name.getMethodName() + " - Executing on device: " + openCLDevice.getShortDescription() + " - " + openCLDevice.getName());
-    	assertTrue(singleThreadedSingleKernelReportObserverTestHelper(openCLDevice, 128));
+    	assertTrue(singleThreadedSingleKernelReportObserverTestHelper(openCLDevice, 128, false));
     }
 
     /**
@@ -124,7 +124,7 @@ public class ProfileReportNewAPITest {
     public void singleThreadedSingleKernelObserverJTPTest() {
     	KernelManager.setKernelManager(new JTPKernelManager());
     	Device device = KernelManager.instance().bestDevice();
-    	assertTrue(singleThreadedSingleKernelReportObserverTestHelper(device, 16));
+    	assertTrue(singleThreadedSingleKernelReportObserverTestHelper(device, 16, true));
     }
 
     private class ThreadTestState {
@@ -172,13 +172,12 @@ public class ProfileReportNewAPITest {
 		}
     }
 
-    public boolean singleThreadedSingleKernelReportObserverTestHelper(Device device, int size) {
+    public boolean singleThreadedSingleKernelReportObserverTestHelper(Device device, int size, boolean isJTP) {
     	final int runs = 100;
     	final int inputArray[] = new int[size];
     	final Basic1Kernel kernel = new Basic1Kernel();
 
-    	int[] outputArray = null;
-    	Range range = device.createRange(size, size);
+    	int[] outputArray = null;    	
 
     	ReportObserver observer = new ReportObserver(device, 1, runs);
     	observer.addAcceptedThreadId(Thread.currentThread().getId());
@@ -188,21 +187,27 @@ public class ProfileReportNewAPITest {
 			assertFalse("Report with id " + i + " shouldn't have been received yet", observer.receivedReportIds[i]);
 		}
 
-    	long startOfExecution = System.currentTimeMillis();
+    	long startOfExecution = System.nanoTime();
+    	//Range must be created here, to account for the Kernel compilation time, which is triggered on Range creation
+    	Range range = device.createRange(kernel, size, size);
     	try {
     		for (int i = 0; i < runs; i++) {
     			outputArray = Arrays.copyOf(inputArray, inputArray.length);
     			kernel.setInputOuputArray(outputArray);
     			kernel.execute(range);
     		}
-    		long runTime = System.currentTimeMillis() - startOfExecution;
+    		double runTime = (System.nanoTime() - startOfExecution) / 1000000.0;
     		ConcurrentSkipListMap<Long, ThreadTestState> results = observer.getObservedThreadsIds();
     		ThreadTestState state = results.get(Thread.currentThread().getId());
     		assertNotNull("Reports should have been received for thread", state);
 
     		assertEquals("Number of profiling reports doesn't match the expected", runs, state.receivedReportsCount);
     		assertEquals("Aparapi Accumulated execution time doesn't match", kernel.getAccumulatedExecutionTimeAllThreads(device), state.accumulatedElapsedTime, 1e-10);
-    		// FIXME failing: assertEquals("Test estimated accumulated time doesn't match within 200ms window", runTime, kernel.getAccumulatedExecutionTimeAllThreads(device), 200);
+            //Here we allow a 10% error margin for machines under heavy load during the test, where latency is higher
+            //as well as, introduce a 250ms tolerance for fast machines for which the execution time is of the same order of the Java latency
+    		if (!isJTP) {
+    		    assertEquals("Test estimated accumulated time doesn't match within a 10% time window", runTime, kernel.getAccumulatedExecutionTimeAllThreads(device), 0.1f * runTime + 250);
+    		}
     		for (int i = 0; i < runs; i++) {
     			assertTrue("Report with id " + i + " wasn't received", observer.receivedReportIds[i]);
     		}
@@ -223,7 +228,7 @@ public class ProfileReportNewAPITest {
     public void multiThreadedSingleKernelObserverOpenCLTest() throws Exception {
     	setUpBefore();
     	logger.log(Level.INFO, "Test " + name.getMethodName() + " - Executing on device: " + openCLDevice.getShortDescription() + " - " + openCLDevice.getName());
-    	assertTrue(multiThreadedSingleKernelReportObserverTestHelper(openCLDevice, 128));
+    	assertTrue(multiThreadedSingleKernelReportObserverTestHelper(openCLDevice, 128, false));
     }
 
     /**
@@ -234,11 +239,11 @@ public class ProfileReportNewAPITest {
     public void multiThreadedSingleKernelObserverJTPTest() throws Exception  {
     	KernelManager.setKernelManager(new JTPKernelManager());
     	Device device = KernelManager.instance().bestDevice();
-    	assertTrue(multiThreadedSingleKernelReportObserverTestHelper(device, 16));
+    	assertTrue(multiThreadedSingleKernelReportObserverTestHelper(device, 16, true));
     }
 
     private class ThreadResults {
-    	private long runTime;
+    	private double runTime;
     	private long threadId;
     	private int kernelCalls;
     	private double accumulatedExecutionTime;
@@ -260,15 +265,15 @@ public class ProfileReportNewAPITest {
 						int id = atomicResultId.getAndIncrement();
 						results[id].threadId = Thread.currentThread().getId();
 						observer.addAcceptedThreadId(results[id].threadId);
-						long startOfExecution = System.currentTimeMillis();
+						long startOfExecution = System.nanoTime();
 						results[id].kernelCalls = 0;
 						for (int i = 0; i < runs; i++) {
 							results[id].outputArray = Arrays.copyOf(inputArray, inputArray.length);
 							k.setInputOuputArray(results[id].outputArray);
-		    				k.execute(Range.create(device, size, size));
+		    				k.execute(Range.create(k, device, size, size));
 		    				results[id].kernelCalls++;
 						}
-						results[id].runTime = System.currentTimeMillis() - startOfExecution;
+						results[id].runTime = (System.nanoTime() - startOfExecution) / 1000000.0;
 						results[id].accumulatedExecutionTime = k.getAccumulatedExecutionTimeCurrentThread(device);
 					}
 				}));
@@ -295,7 +300,7 @@ public class ProfileReportNewAPITest {
     	return terminatedOk;
     }
 
-	public boolean multiThreadedSingleKernelReportObserverTestHelper(Device device, int size) throws InterruptedException, ExecutionException {
+	public boolean multiThreadedSingleKernelReportObserverTestHelper(Device device, int size, boolean isJTP) throws InterruptedException, ExecutionException {
     	final int runs = 100;
     	final int javaThreads = 10;
     	final int inputArray[] = new int[size];
@@ -329,20 +334,23 @@ public class ProfileReportNewAPITest {
     	ConcurrentSkipListMap<Long, ThreadTestState> states = observer.getObservedThreadsIds();
     	assertEquals("Number of Java threads sending profile reports should match the number of JavaThreads", javaThreads, states.values().size());
     	for (int i = 0; i < javaThreads; i++) {
-    		ThreadTestState state = states.get(results[i].threadId);
-    		assertNotNull("Report should have been received for thread with index " + i, state);
+    		ThreadTestState stateI = states.get(results[i].threadId);
+    		assertNotNull("Report should have been received for thread with index " + i, stateI);
     		assertEquals("Number of total iteration should match number of runs for thread with index " + i, runs, results[i].kernelCalls);
-        	assertEquals("Number of received reports should match total number of calls for thread with index " + i, runs, state.receivedReportsCount);
-        	assertEquals("Overall elapsed time received in reports doesn't match KernelDeviceProfile.Accumulator for threa with index " + i,
-        			results[i].accumulatedExecutionTime, state.accumulatedElapsedTime, 1e-10);
-        	allThreadsAccumulatedTime += state.accumulatedElapsedTime;
+        	assertEquals("Number of received reports should match total number of calls for thread with index " + i, runs, stateI.receivedReportsCount);
+        	assertEquals("Overall elapsed time received in reports doesn't match KernelDeviceProfile.Accumulator for thread with index " + i,
+        			results[i].accumulatedExecutionTime, stateI.accumulatedElapsedTime, 1e-10);
+        	allThreadsAccumulatedTime += stateI.accumulatedElapsedTime;
         	assertTrue("Thread index " + i + " kernel computation doesn't match the expected", validateBasic1Kernel(inputArray, results[i].outputArray));
-        	//FIXME Find a better way of determining kernel execution time
-        	//assertEquals("Runtime is not within 600ms of the kernel estimated", results[i].runTime, state.accumulatedElapsedTime, 600);
+        	if (!isJTP) {
+                //Here we allow a 10% error margin for machines under heavy load during the test, where latency is higher
+                //as well as, introduce a 250ms tolerance for fast machines for which the execution time is of the same order of the Java latency
+            	assertEquals("Test estimated accumulated time doesn't match within a 10% time window", results[i].runTime, stateI.accumulatedElapsedTime, 0.1f * results[i].runTime + 250);
+        	}
     	}
 
     	assertEquals("Overall kernel execution time doesn't match",
-    			kernels.get(0).getAccumulatedExecutionTimeAllThreads(device), allThreadsAccumulatedTime, 1e10);
+    			kernels.get(0).getAccumulatedExecutionTimeAllThreads(device), allThreadsAccumulatedTime, 1e-10);
 
     	return true;
     }
diff --git a/src/test/java/com/aparapi/runtime/ProfileReportUnitTest.java b/src/test/java/com/aparapi/runtime/ProfileReportUnitTest.java
index f65411c82bb7edec156be0329092969eb73954fd..c6634b7ba1a8da54ab2fce857173f4e0c7ae17cf 100644
--- a/src/test/java/com/aparapi/runtime/ProfileReportUnitTest.java
+++ b/src/test/java/com/aparapi/runtime/ProfileReportUnitTest.java
@@ -81,7 +81,7 @@ public class ProfileReportUnitTest {
 		});
 		
 		//Ensure that the first thread as started profiling, before testing the others
-		kernelDeviceProfile.onEvent(ProfilingEvent.START);
+		kernelDeviceProfile.onEvent(ProfilingEvent.START, false);
 		
 		List<ProfilingEvent> events = Arrays.asList(ProfilingEvent.values());
 		
@@ -92,8 +92,8 @@ public class ProfileReportUnitTest {
 					final int idx = index.getAndIncrement();
 					executorService.submit(() -> {
 						threadIds[idx] = Thread.currentThread().getId();
-						kernelDeviceProfile.onEvent(ProfilingEvent.START);
-						kernelDeviceProfile.onEvent(ProfilingEvent.EXECUTED);
+						kernelDeviceProfile.onEvent(ProfilingEvent.START, false);
+						kernelDeviceProfile.onEvent(ProfilingEvent.EXECUTED, false);
 					});
 				});
 			} finally {
@@ -112,7 +112,7 @@ public class ProfileReportUnitTest {
 			assertEquals("Reports from all threads should have been received", javaThreads, receivedReports.get());
 			
 			//Only after this event should the main thread have received a report
-			kernelDeviceProfile.onEvent(ProfilingEvent.EXECUTED);
+			kernelDeviceProfile.onEvent(ProfilingEvent.EXECUTED, false);
 			
 			assertTrue("Report wasn't received for main thread", onEventAccepted.contains(threadIds[javaThreads]));
 			assertEquals("Reports from all threads should have been received", javaThreads + 1, receivedReports.get());
@@ -164,6 +164,10 @@ public class ProfileReportUnitTest {
 		report.setProfileReport(reportId + 1, valuesB);
 		
 		for (int i = 1; i < values.length; i++) {
+		    if (i == ProfilingEvent.READY_TO_PREPARE_EXECUTE.ordinal()) {
+		        //This one is expected to always be equal
+		        continue;
+		    }
 			assertNotEquals("Values match after new assingment for index " + i, report.getElapsedTime(i), clonedReport.getElapsedTime(i), 1e-10);
 		}
 		
diff --git a/src/test/java/com/aparapi/runtime/RangeSizeTest.java b/src/test/java/com/aparapi/runtime/RangeSizeTest.java
index 2b1db1705d01fda5dfd5a016fd081cc345226b36..a2411e9f9ce37b9e232ab6f29c02db5a8b35e312 100644
--- a/src/test/java/com/aparapi/runtime/RangeSizeTest.java
+++ b/src/test/java/com/aparapi/runtime/RangeSizeTest.java
@@ -15,23 +15,70 @@
  */
 package com.aparapi.runtime;
 
+import com.aparapi.Kernel;
 import com.aparapi.Range;
+import com.aparapi.device.Device;
+import com.aparapi.device.JavaDevice;
+import com.aparapi.internal.kernel.KernelManager;
+
 import org.junit.Test;
 
+import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertTrue;
 
+import java.util.Arrays;
+import java.util.LinkedHashSet;
+import java.util.List;
+
+import org.junit.After;
+
 public class RangeSizeTest {
 
     @Test
     public void test384x384() {
-        Range range = Range.create2D(384, 384);
+        Range range = Range.create2D((Kernel)null, 384, 384);
         assertTrue("Range > max work size", range.getLocalSize(0) * range.getLocalSize(1) <= range.getWorkGroupSize());
     }
 
     @Test
     public void test384x320() {
-        Range range = Range.create2D(384, 320);
+        Range range = Range.create2D((Kernel)null, 384, 320);
         assertTrue("Range > max work size", range.getLocalSize(0) * range.getLocalSize(1) <= range.getWorkGroupSize());
     }
 
+    private class JTPKernelManager extends KernelManager {
+        private JTPKernelManager() {
+            LinkedHashSet<Device> preferredDevices = new LinkedHashSet<Device>(1);
+            preferredDevices.add(JavaDevice.THREAD_POOL);
+            setDefaultPreferredDevices(preferredDevices);
+        }
+        @Override
+        protected List<Device.TYPE> getPreferredDeviceTypes() {
+            return Arrays.asList(Device.TYPE.JTP);
+        }
+    }
+        
+    @After
+    public void classTeardown() {
+        Util.resetKernelManager();
+    }
+    
+    @Test
+    public void testJTPRange() {
+        KernelManager.setKernelManager(new JTPKernelManager());
+                
+        int[] a = {1, 2, 3, 4, 5, 6, 7};
+        
+        new Kernel() {
+            @Override 
+            public void run() {
+                int i = getGlobalId();
+                a[i] = a[i] * 2;
+            }
+        }.execute(a.length);
+        
+        for (int i = 0; i < a.length; i++) {
+            assertEquals("Result doesn't match the expected at index: " + i, (i+1)*2, a[i]);
+        }
+    }
 }