From f5f39301ab3c55663dbe5ded456e23e165ad283d Mon Sep 17 00:00:00 2001
From: Barney Pitt <barneydpitt@gmail.com>
Date: Sun, 14 Sep 2014 09:49:05 +0000
Subject: [PATCH]

---
 .../src/java/com/amd/aparapi/Kernel.java      | 113 +++++++---
 .../exception/ClassParseException.java        |   3 +-
 .../internal/instruction/ExpressionList.java  |   1 -
 .../aparapi/internal/jni/KernelRunnerJNI.java |   4 +-
 .../aparapi/internal/kernel/KernelRunner.java |  24 ++-
 .../aparapi/internal/model/ClassModel.java    | 153 ++++++++++---
 .../aparapi/internal/model/Entrypoint.java    | 113 +++++-----
 .../aparapi/internal/model/MethodModel.java   | 202 ++++++++----------
 .../aparapi/internal/writer/BlockWriter.java  |  90 +++-----
 .../aparapi/internal/writer/KernelWriter.java | 116 +++++-----
 .../amd/aparapi/sample/median/MedianDemo.java |  63 ++++++
 .../sample/median/MedianKernel7x7.java        | 168 +++++++++++++++
 .../aparapi/sample/median/MedianSettings.java |  15 ++
 13 files changed, 710 insertions(+), 355 deletions(-)
 create mode 100644 samples/median/src/com/amd/aparapi/sample/median/MedianDemo.java
 create mode 100644 samples/median/src/com/amd/aparapi/sample/median/MedianKernel7x7.java
 create mode 100644 samples/median/src/com/amd/aparapi/sample/median/MedianSettings.java

diff --git a/com.amd.aparapi/src/java/com/amd/aparapi/Kernel.java b/com.amd.aparapi/src/java/com/amd/aparapi/Kernel.java
index 900b3440..0069d4e4 100644
--- a/com.amd.aparapi/src/java/com/amd/aparapi/Kernel.java
+++ b/com.amd.aparapi/src/java/com/amd/aparapi/Kernel.java
@@ -37,25 +37,18 @@ under those regulations, please refer to the U.S. Bureau of Industry and Securit
 */
 package com.amd.aparapi;
 
-import java.lang.annotation.Retention;
-import java.lang.annotation.RetentionPolicy;
-import java.lang.reflect.Method;
-import java.util.Arrays;
-import java.util.HashMap;
-import java.util.Iterator;
-import java.util.LinkedHashSet;
-import java.util.List;
-import java.util.Map;
-import java.util.concurrent.BrokenBarrierException;
-import java.util.concurrent.CyclicBarrier;
-import java.util.logging.Logger;
-
-import com.amd.aparapi.annotation.Experimental;
-import com.amd.aparapi.exception.DeprecatedException;
-import com.amd.aparapi.internal.kernel.KernelRunner;
-import com.amd.aparapi.internal.model.ClassModel.ConstantPool.MethodReferenceEntry;
-import com.amd.aparapi.internal.opencl.OpenCLLoader;
-import com.amd.aparapi.internal.util.UnsafeWrapper;
+import com.amd.aparapi.annotation.*;
+import com.amd.aparapi.exception.*;
+import com.amd.aparapi.internal.kernel.*;
+import com.amd.aparapi.internal.model.ClassModel.ConstantPool.*;
+import com.amd.aparapi.internal.opencl.*;
+import com.amd.aparapi.internal.util.*;
+
+import java.lang.annotation.*;
+import java.lang.reflect.*;
+import java.util.*;
+import java.util.concurrent.*;
+import java.util.logging.*;
 
 /**
  * A <i>kernel</i> encapsulates a data parallel algorithm that will execute either on a GPU
@@ -159,7 +152,7 @@ public abstract class Kernel implements Cloneable {
     *  int[] buffer_$local$ = new int[1024];
     *  </code></pre>
     *  
-    *  @see LOCAL_SUFFIX
+    *  @see #LOCAL_SUFFIX
     * 
     * 
     */
@@ -180,7 +173,7 @@ public abstract class Kernel implements Cloneable {
     *  int[] buffer_$constant$ = new int[1024];
     *  </code></pre>
     *  
-    *  @see LOCAL_SUFFIX
+    *  @see #LOCAL_SUFFIX
     * 
     * 
     */
@@ -189,6 +182,46 @@ public abstract class Kernel implements Cloneable {
 
    }
 
+   /**
+    *
+    *  We can use this Annotation to 'tag' __private (unshared) array fields. Data in the __private address space in OpenCL is accessible only from
+    *  the current kernel instance.
+    *
+    *  To so mark a field with a buffer size of 99, we can either annotate the buffer
+    *  <pre><code>
+    *  &#64PrivateMemorySpace(99) int[] buffer = new int[99];
+    *  </code></pre>
+    *   Or use a special suffix
+    *  <pre><code>
+    *  int[] buffer_$private$99 = new int[99];
+    *  </code></pre>
+    *
+    *  <p>Note that any code which must be runnable in {@link EXECUTION_MODE#JTP} will fail to work correctly if it uses such an
+    *  array, as the array will be shared by all threads. The solution is to create a {@link NoCL} method called at the start of {@link #run()} which sets
+    *  the field to an array returned from a static <code>ThreadLocal<foo[]></code></p>. Please see <code>MedianKernel7x7</code> in the samples for an example.
+    *
+    *  @see #PRIVATE_SUFFIX
+    */
+   @Retention(RetentionPolicy.RUNTIME)
+   @Target({ElementType.FIELD})
+   public @interface PrivateMemorySpace {
+      /** Size of the array used as __private buffer. */
+      int value();
+   }
+
+    /**
+     * Annotation which can be applied to either a getter (with usual java bean naming convention relative to an instance field), or to any method
+     * with void return type, which prevents both the method body and any calls to the method being emitted in the generated OpenCL. (In the case of a getter, the
+     * underlying field is used in place of the NoCL getter method.) This allows for code specialization within a java/JTP execution path, for example to
+     * allow logging/breakpointing when debugging, or to apply ThreadLocal processing (see {@link PrivateMemorySpace}) in java to simulate OpenCL __private
+     * memory.
+     */
+   @Retention(RetentionPolicy.RUNTIME)
+   @Target({ElementType.METHOD, ElementType.FIELD})
+   public @interface NoCL {
+      // empty
+   }
+
    /**
     *  We can use this suffix to 'tag' intended local buffers. 
     *  
@@ -219,6 +252,22 @@ public abstract class Kernel implements Cloneable {
     */
    public final static String CONSTANT_SUFFIX = "_$constant$";
 
+   /**
+    *  We can use this suffix to 'tag' __private buffers.
+    *
+    *  <p>So either name the buffer
+    *  <pre><code>
+    *  int[] buffer_$private$32 = new int[32];
+    *  </code></pre>
+    *  Or use the Annotation form
+    *  <pre><code>
+    *  &#64PrivateMemorySpace(32) int[] buffer = new int[32];
+    *  </code></pre>
+    *
+    *  @see PrivateMemorySpace for a more detailed usage summary
+    */
+   public final static String PRIVATE_SUFFIX = "_$private$";
+
    /**
     * This annotation is for internal use only
     */
@@ -448,8 +497,6 @@ public abstract class Kernel implements Cloneable {
 
       /**
        * Copy constructor
-       * 
-       * @param KernelState
        */
       protected KernelState(KernelState kernelState) {
          globalIds = kernelState.getGlobalIds();
@@ -1826,8 +1873,8 @@ public abstract class Kernel implements Cloneable {
     * 
     * @return The time spent executing the kernel (ms) 
     * 
-    * @see getConversionTime();
-    * @see getAccumulatedExectutionTime();
+    * @see #getConversionTime();
+    * @see #getAccumulatedExecutionTime();
     * 
     */
    public synchronized long getExecutionTime() {
@@ -1845,8 +1892,8 @@ public abstract class Kernel implements Cloneable {
     * 
     * @return The total time spent executing the kernel (ms) 
     * 
-    * @see getExecutionTime();
-    * @see getConversionTime();
+    * @see #getExecutionTime();
+    * @see #getConversionTime();
     * 
     */
    public synchronized long getAccumulatedExecutionTime() {
@@ -1861,8 +1908,8 @@ public abstract class Kernel implements Cloneable {
     * Determine the time taken to convert bytecode to OpenCL for first Kernel.execute(range) call.
     * @return The time spent preparing the kernel for execution using GPU
     * 
-    * @see getExecutionTime();
-    * @see getAccumulatedExectutionTime();
+    * @see #getExecutionTime();
+    * @see #getAccumulatedExecutionTime();
     */
    public synchronized long getConversionTime() {
       if (kernelRunner == null) {
@@ -1878,7 +1925,7 @@ public abstract class Kernel implements Cloneable {
     * When <code>kernel.execute(globalSize)</code> is invoked, Aparapi will schedule the execution of <code>globalSize</code> kernels. If the execution mode is GPU then 
     * the kernels will execute as OpenCL code on the GPU device. Otherwise, if the mode is JTP, the kernels will execute as a pool of Java threads on the CPU. 
     * <p>
-    * @param range The number of Kernels that we would like to initiate.
+    * @param _range The number of Kernels that we would like to initiate.
     * @returnThe Kernel instance (this) so we can chain calls to put(arr).execute(range).get(arr)
     * 
     */
@@ -1907,7 +1954,6 @@ public abstract class Kernel implements Cloneable {
     * When <code>kernel.execute(_range, _passes)</code> is invoked, Aparapi will schedule the execution of <code>_reange</code> kernels. If the execution mode is GPU then 
     * the kernels will execute as OpenCL code on the GPU device. Otherwise, if the mode is JTP, the kernels will execute as a pool of Java threads on the CPU. 
     * <p>
-    * @param _globalSize The number of Kernels that we would like to initiate.
     * @param _passes The number of passes to make
     * @return The Kernel instance (this) so we can chain calls to put(arr).execute(range).get(arr)
     * 
@@ -1937,8 +1983,7 @@ public abstract class Kernel implements Cloneable {
     * When <code>kernel.execute("entrypoint", globalSize)</code> is invoked, Aparapi will schedule the execution of <code>globalSize</code> kernels. If the execution mode is GPU then 
     * the kernels will execute as OpenCL code on the GPU device. Otherwise, if the mode is JTP, the kernels will execute as a pool of Java threads on the CPU. 
     * <p>
-    * @param _entrypoint is the name of the method we wish to use as the entrypoint to the kernel
-    * @param _globalSize The number of Kernels that we would like to initiate.
+    * @param _entry is the name of the method we wish to use as the entrypoint to the kernel
     * @return The Kernel instance (this) so we can chain calls to put(arr).execute(range).get(arr)
     * 
     */
@@ -1957,7 +2002,6 @@ public abstract class Kernel implements Cloneable {
     * the kernels will execute as OpenCL code on the GPU device. Otherwise, if the mode is JTP, the kernels will execute as a pool of Java threads on the CPU. 
     * <p>
     * @param _entrypoint is the name of the method we wish to use as the entrypoint to the kernel
-    * @param _globalSize The number of Kernels that we would like to initiate.
     * @return The Kernel instance (this) so we can chain calls to put(arr).execute(range).get(arr)
     * 
     */
@@ -1972,7 +2016,6 @@ public abstract class Kernel implements Cloneable {
     * the kernels will execute as OpenCL code on the GPU device. Otherwise, if the mode is JTP, the kernels will execute as a pool of Java threads on the CPU. 
     * <p>
     * @param _entrypoint is the name of the method we wish to use as the entrypoint to the kernel
-    * @param _globalSize The number of Kernels that we would like to initiate.
     * @return The Kernel instance (this) so we can chain calls to put(arr).execute(range).get(arr)
     * 
     */
diff --git a/com.amd.aparapi/src/java/com/amd/aparapi/internal/exception/ClassParseException.java b/com.amd.aparapi/src/java/com/amd/aparapi/internal/exception/ClassParseException.java
index e995b4ab..776dd967 100644
--- a/com.amd.aparapi/src/java/com/amd/aparapi/internal/exception/ClassParseException.java
+++ b/com.amd.aparapi/src/java/com/amd/aparapi/internal/exception/ClassParseException.java
@@ -83,7 +83,8 @@ import com.amd.aparapi.internal.instruction.Instruction;
       ACCESSEDOBJECTSETTERARRAY("Passing array arguments to Intrinsics in expression form is not supported"), //
       MULTIDIMENSIONARRAYASSIGN("Can't assign to two dimension array"), //
       MULTIDIMENSIONARRAYACCESS("Can't access through a two dimensional array"), //
-      MISSINGLOCALVARIABLETABLE("Method does not contain a local variable table (recompile with -g?)");
+      MISSINGLOCALVARIABLETABLE("Method does not contain a local variable table (recompile with -g?)"), //
+      IMPROPERPRIVATENAMEMANGLING("Could not parse private array size from field name");
 
       private String description;
 
diff --git a/com.amd.aparapi/src/java/com/amd/aparapi/internal/instruction/ExpressionList.java b/com.amd.aparapi/src/java/com/amd/aparapi/internal/instruction/ExpressionList.java
index fb649f38..46e2e22e 100644
--- a/com.amd.aparapi/src/java/com/amd/aparapi/internal/instruction/ExpressionList.java
+++ b/com.amd.aparapi/src/java/com/amd/aparapi/internal/instruction/ExpressionList.java
@@ -796,7 +796,6 @@ public class ExpressionList{
             }
             if (startPc < Short.MAX_VALUE) {
                logger.fine("Scope block from " + startPc + " to  " + (tail.getThisPC() + tail.getLength()));
-               System.out.println("Scope block from " + startPc + " to  " + (tail.getThisPC() + tail.getLength()));
                for (Instruction i = head; i != null; i = i.getNextPC()) {
                   if (i.getThisPC() == startPc) {
                      final Instruction startInstruction = i.getRootExpr().getPrevExpr();
diff --git a/com.amd.aparapi/src/java/com/amd/aparapi/internal/jni/KernelRunnerJNI.java b/com.amd.aparapi/src/java/com/amd/aparapi/internal/jni/KernelRunnerJNI.java
index f8020cde..45203930 100644
--- a/com.amd.aparapi/src/java/com/amd/aparapi/internal/jni/KernelRunnerJNI.java
+++ b/com.amd.aparapi/src/java/com/amd/aparapi/internal/jni/KernelRunnerJNI.java
@@ -1,7 +1,5 @@
 package com.amd.aparapi.internal.jni;
 
-import java.util.List;
-
 import com.amd.aparapi.Kernel;
 import com.amd.aparapi.ProfileInfo;
 import com.amd.aparapi.Range;
@@ -10,6 +8,8 @@ import com.amd.aparapi.device.OpenCLDevice;
 import com.amd.aparapi.internal.annotation.DocMe;
 import com.amd.aparapi.internal.annotation.UsedByJNICode;
 
+import java.util.List;
+
 /**
  * This class is intended to be used as a 'proxy' or 'facade' object for Java code to interact with JNI
  */
diff --git a/com.amd.aparapi/src/java/com/amd/aparapi/internal/kernel/KernelRunner.java b/com.amd.aparapi/src/java/com/amd/aparapi/internal/kernel/KernelRunner.java
index 92654609..821364c8 100644
--- a/com.amd.aparapi/src/java/com/amd/aparapi/internal/kernel/KernelRunner.java
+++ b/com.amd.aparapi/src/java/com/amd/aparapi/internal/kernel/KernelRunner.java
@@ -114,7 +114,7 @@ public class KernelRunner extends KernelRunnerJNI{
    /**
     * <code>Kernel.dispose()</code> delegates to <code>KernelRunner.dispose()</code> which delegates to <code>disposeJNI()</code> to actually close JNI data structures.<br/>
     * 
-    * @see KernelRunnerJNI#disposeJNI()
+    * @see KernelRunnerJNI#disposeJNI(long)
     */
    public void dispose() {
       if (kernel.getExecutionMode().isOpenCL()) {
@@ -778,12 +778,24 @@ public class KernelRunner extends KernelRunnerJNI{
                   throw new IllegalStateException("Cannot send null refs to kernel, reverting to java");
                }
 
+               String fieldName = arg.getField().getName();
+               int arrayLength = Array.getLength(newArrayRef);
+               Integer privateMemorySize = ClassModel.getPrivateMemorySizeFromField(arg.getField());
+               if (privateMemorySize == null) {
+                  privateMemorySize = ClassModel.getPrivateMemorySizeFromFieldName(fieldName);
+               }
+               if (privateMemorySize != null) {
+                  if (arrayLength > privateMemorySize) {
+                     throw new IllegalStateException("__private array field " + fieldName + " has illegal length " + arrayLength + " > " + privateMemorySize);
+                  }
+               }
+
                if ((arg.getType() & ARG_OBJ_ARRAY_STRUCT) != 0) {
                   prepareOopConversionBuffer(arg);
                } else {
                   // set up JNI fields for normal arrays
                   arg.setJavaArray(newArrayRef);
-                  arg.setNumElements(Array.getLength(newArrayRef));
+                  arg.setNumElements(arrayLength);
                   arg.setSizeInBytes(arg.getNumElements() * arg.getPrimitiveSize());
 
                   if (((args[i].getType() & ARG_EXPLICIT) != 0) && puts.contains(newArrayRef)) {
@@ -1070,9 +1082,9 @@ public class KernelRunner extends KernelRunnerJNI{
 
                            if (type.getName().startsWith("[L")) {
                               args[i].setType(args[i].getType()
-                                    | (ARG_OBJ_ARRAY_STRUCT | 
-                                       ARG_WRITE | 
-                                       ARG_READ | 
+                                    | (ARG_OBJ_ARRAY_STRUCT |
+                                       ARG_WRITE |
+                                       ARG_READ |
                                        ARG_APARAPI_BUFFER));
 
                               if (logger.isLoggable(Level.FINE)) {
@@ -1288,7 +1300,6 @@ public class KernelRunner extends KernelRunnerJNI{
     *          It is assumed that this parameter is indeed an array (of int, float, short etc).
     * 
     * @see Kernel#get(int[] arr)
-    * @see Kernel#get(short[] arr)
     * @see Kernel#get(float[] arr)
     * @see Kernel#get(double[] arr)
     * @see Kernel#get(long[] arr)
@@ -1320,7 +1331,6 @@ public class KernelRunner extends KernelRunnerJNI{
     * @param array
     *          It is assumed that this parameter is indeed an array (of int, float, short etc).
     * @see Kernel#put(int[] arr)
-    * @see Kernel#put(short[] arr)
     * @see Kernel#put(float[] arr)
     * @see Kernel#put(double[] arr)
     * @see Kernel#put(long[] arr)
diff --git a/com.amd.aparapi/src/java/com/amd/aparapi/internal/model/ClassModel.java b/com.amd.aparapi/src/java/com/amd/aparapi/internal/model/ClassModel.java
index b390f9d3..e1b269fa 100644
--- a/com.amd.aparapi/src/java/com/amd/aparapi/internal/model/ClassModel.java
+++ b/com.amd.aparapi/src/java/com/amd/aparapi/internal/model/ClassModel.java
@@ -37,25 +37,18 @@ under those regulations, please refer to the U.S. Bureau of Industry and Securit
 */
 package com.amd.aparapi.internal.model;
 
-import java.io.ByteArrayInputStream;
-import java.io.InputStream;
-import java.util.ArrayList;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Stack;
-import java.util.logging.Level;
-import java.util.logging.Logger;
-
-import com.amd.aparapi.Config;
-import com.amd.aparapi.Kernel;
-import com.amd.aparapi.internal.annotation.DocMe;
-import com.amd.aparapi.internal.exception.AparapiException;
-import com.amd.aparapi.internal.exception.ClassParseException;
-import com.amd.aparapi.internal.instruction.InstructionSet.TypeSpec;
-import com.amd.aparapi.internal.model.ClassModel.AttributePool.CodeEntry;
-import com.amd.aparapi.internal.model.ClassModel.ConstantPool.FieldEntry;
-import com.amd.aparapi.internal.model.ClassModel.ConstantPool.MethodEntry;
-import com.amd.aparapi.internal.reader.ByteReader;
+import com.amd.aparapi.*;
+import com.amd.aparapi.internal.annotation.*;
+import com.amd.aparapi.internal.exception.*;
+import com.amd.aparapi.internal.instruction.InstructionSet.*;
+import com.amd.aparapi.internal.model.ClassModel.AttributePool.*;
+import com.amd.aparapi.internal.model.ClassModel.ConstantPool.*;
+import com.amd.aparapi.internal.reader.*;
+
+import java.io.*;
+import java.lang.reflect.*;
+import java.util.*;
+import java.util.logging.*;
 
 /**
  * Class represents a ClassFile (MyClass.class).
@@ -129,6 +122,11 @@ public class ClassModel{
 
    private ClassModel superClazz = null;
 
+   private HashSet<String> noClMethods = null;
+
+   private HashMap<String, Kernel.PrivateMemorySpace> privateMemoryFields = null;
+
+
    /**
     * Create a ClassModel representing a given Class.
     * 
@@ -183,7 +181,7 @@ public class ClassModel{
    /**
     * Determine if this is the superclass of some other class.
     * 
-    * @param otherClass The class to compare against
+    * @param other The class to compare against
     * @return true if 'this' a superclass of another class   
     */
    public boolean isSuperClass(Class<?> other) {
@@ -258,6 +256,80 @@ public class ClassModel{
       return (returnName);
    }
 
+   /**
+    * If a field does not satisfy the private memory conditions, null, otherwise the size of private memory required.
+    */
+   public Integer getPrivateMemorySize(String fieldName) throws ClassParseException {
+      if (privateMemoryFields == null) {
+         privateMemoryFields = new HashMap<String, Kernel.PrivateMemorySpace>();
+         HashMap<Field, Kernel.PrivateMemorySpace> privateMemoryFields = new HashMap<Field, Kernel.PrivateMemorySpace>();
+         for (Field field : getClassWeAreModelling().getDeclaredFields()) {
+            Kernel.PrivateMemorySpace privateMemorySpace = field.getAnnotation(Kernel.PrivateMemorySpace.class);
+            if (privateMemorySpace != null) {
+               privateMemoryFields.put(field, privateMemorySpace);
+            }
+         }
+         for (Field field : getClassWeAreModelling().getFields()) {
+            Kernel.PrivateMemorySpace privateMemorySpace = field.getAnnotation(Kernel.PrivateMemorySpace.class);
+            if (privateMemorySpace != null) {
+               privateMemoryFields.put(field, privateMemorySpace);
+            }
+         }
+         for (Map.Entry<Field, Kernel.PrivateMemorySpace> entry : privateMemoryFields.entrySet()) {
+            this.privateMemoryFields.put(entry.getKey().getName(), entry.getValue());
+         }
+      }
+      Kernel.PrivateMemorySpace annotation = privateMemoryFields.get(fieldName);
+      if (annotation != null) {
+         return annotation.value();
+      }
+      return getPrivateMemorySizeFromFieldName(fieldName);
+   }
+
+   public static Integer getPrivateMemorySizeFromField(Field field) {
+      Kernel.PrivateMemorySpace privateMemorySpace = field.getAnnotation(Kernel.PrivateMemorySpace.class);
+      if (privateMemorySpace != null) {
+         return privateMemorySpace.value();
+      } else {
+         return null;
+      }
+   }
+
+   public static Integer getPrivateMemorySizeFromFieldName(String fieldName) throws ClassParseException {
+      if (fieldName.contains(Kernel.PRIVATE_SUFFIX)) {
+         int lastDollar = fieldName.lastIndexOf('$');
+         String sizeText = fieldName.substring(lastDollar + 1);
+         try {
+            return new Integer(Integer.parseInt(sizeText));
+         } catch (NumberFormatException e) {
+            throw new ClassParseException(ClassParseException.TYPE.IMPROPERPRIVATENAMEMANGLING, fieldName);
+         }
+      }
+      return null;
+   }
+
+   public Set<String> getNoCLMethods() {
+      if (this.noClMethods == null) {
+         noClMethods = new HashSet<String>();
+         HashSet<Method> methods = new HashSet<Method>();
+         for (Method method : getClassWeAreModelling().getDeclaredMethods()) {
+            if (method.getAnnotation(Kernel.NoCL.class) != null) {
+               methods.add(method);
+            }
+         }
+         for (Method method : getClassWeAreModelling().getMethods()) {
+            if (method.getAnnotation(Kernel.NoCL.class) != null) {
+               methods.add(method);
+            }
+         }
+         for (Method method : methods) {
+            noClMethods.add(method.getName());
+         }
+
+      }
+      return noClMethods;
+   }
+
    public static String convert(String _string) {
       return (convert(_string, "", false));
    }
@@ -643,6 +715,10 @@ public class ClassModel{
          public int getSlot() {
             return (slot);
          }
+
+         public ClassModel getOwnerClassModel() {
+            return ClassModel.this;
+         }
       }
 
       public class ClassEntry extends Entry{
@@ -1155,6 +1231,18 @@ public class ClassModel{
          }
       }
 
+      FieldEntry getFieldEntry(String _name) {
+         for (Entry entry : entries) {
+            if (entry instanceof FieldEntry) {
+               String fieldName = ((FieldEntry) entry).getNameAndTypeEntry().getNameUTF8Entry().getUTF8();
+               if (_name.equals(fieldName)) {
+                  return (FieldEntry) entry;
+               }
+            }
+         }
+         return null;
+      }
+
       public FloatEntry getFloatEntry(int _index) {
          try {
             return ((FloatEntry) entries.get(_index));
@@ -1490,7 +1578,7 @@ public class ClassModel{
                exceptionPoolEntries.add(new ExceptionPoolEntry(_byteReader));
             }
 
-            codeEntryAttributePool = new AttributePool(_byteReader);
+            codeEntryAttributePool = new AttributePool(_byteReader, getName());
          }
 
          @Override public AttributePool getAttributePool() {
@@ -1969,7 +2057,8 @@ public class ClassModel{
                   public PrimitiveValue(int _tag, ByteReader _byteReader) {
                      super(_tag);
                      typeNameIndex = _byteReader.u2();
-                     constNameIndex = _byteReader.u2();
+                     //constNameIndex = _byteReader.u2();
+                     constNameIndex = 0;
                   }
 
                   public int getConstNameIndex() {
@@ -2125,13 +2214,17 @@ public class ClassModel{
 
       private final static String LOCALVARIABLETYPETABLE_TAG = "LocalVariableTypeTable";
 
-      public AttributePool(ByteReader _byteReader) {
+      public AttributePool(ByteReader _byteReader, String name) {
          final int attributeCount = _byteReader.u2();
          AttributePoolEntry entry = null;
          for (int i = 0; i < attributeCount; i++) {
             final int attributeNameIndex = _byteReader.u2();
             final int length = _byteReader.u4();
-            final String attributeName = constantPool.getUTF8Entry(attributeNameIndex).getUTF8();
+            UTF8Entry utf8Entry = constantPool.getUTF8Entry(attributeNameIndex);
+            if (utf8Entry == null) {
+               throw new IllegalStateException("corrupted state reading attributes for " + name);
+            }
+            final String attributeName = utf8Entry.getUTF8();
             if (attributeName.equals(LOCALVARIABLETABLE_TAG)) {
                localVariableTableEntry = new RealLocalVariableTableEntry(_byteReader, attributeNameIndex, length);
                entry = (RealLocalVariableTableEntry) localVariableTableEntry;
@@ -2182,8 +2275,8 @@ public class ClassModel{
             } else {
                logger.warning("Found unexpected Attribute (name = " + attributeName + ")");
                entry = new OtherEntry(_byteReader, attributeNameIndex, length);
-               attributePoolEntries.add(entry);
             }
+            attributePoolEntries.add(entry);
 
          }
       }
@@ -2248,7 +2341,7 @@ public class ClassModel{
          fieldAccessFlags = _byteReader.u2();
          nameIndex = _byteReader.u2();
          descriptorIndex = _byteReader.u2();
-         fieldAttributePool = new AttributePool(_byteReader);
+         fieldAttributePool = new AttributePool(_byteReader, getName());
       }
 
       public int getAccessFlags() {
@@ -2318,7 +2411,7 @@ public class ClassModel{
          methodAccessFlags = _byteReader.u2();
          nameIndex = _byteReader.u2();
          descriptorIndex = _byteReader.u2();
-         methodAttributePool = new AttributePool(_byteReader);
+         methodAttributePool = new AttributePool(_byteReader, getName());
          codeEntry = methodAttributePool.getCodeEntry();
       }
 
@@ -2397,6 +2490,10 @@ public class ClassModel{
       public String toString() {
          return getClassModel().getClassWeAreModelling().getName() + "." + getName() + " " + getDescriptor();
       }
+
+      public ClassModel getOwnerClassModel() {
+         return ClassModel.this;
+      }
    }
 
    public class ClassModelInterface{
@@ -2475,7 +2572,7 @@ public class ClassModel{
          methods.add(method);
       }
 
-      attributePool = new AttributePool(byteReader);
+      attributePool = new AttributePool(byteReader, getClassWeAreModelling().getSimpleName());
    }
 
    public int getMagic() {
diff --git a/com.amd.aparapi/src/java/com/amd/aparapi/internal/model/Entrypoint.java b/com.amd.aparapi/src/java/com/amd/aparapi/internal/model/Entrypoint.java
index 425d5802..28aadab2 100644
--- a/com.amd.aparapi/src/java/com/amd/aparapi/internal/model/Entrypoint.java
+++ b/com.amd.aparapi/src/java/com/amd/aparapi/internal/model/Entrypoint.java
@@ -37,46 +37,18 @@ under those regulations, please refer to the U.S. Bureau of Industry and Securit
 */
 package com.amd.aparapi.internal.model;
 
-import java.lang.reflect.Field;
-import java.lang.reflect.Modifier;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.Comparator;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.LinkedHashMap;
-import java.util.LinkedHashSet;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-import java.util.logging.Level;
-import java.util.logging.Logger;
-
-import com.amd.aparapi.Config;
-import com.amd.aparapi.Kernel;
-import com.amd.aparapi.internal.exception.AparapiException;
-import com.amd.aparapi.internal.exception.ClassParseException;
-import com.amd.aparapi.internal.instruction.Instruction;
-import com.amd.aparapi.internal.instruction.InstructionSet;
-import com.amd.aparapi.internal.instruction.InstructionSet.AccessArrayElement;
-import com.amd.aparapi.internal.instruction.InstructionSet.AccessField;
-import com.amd.aparapi.internal.instruction.InstructionSet.AssignToArrayElement;
-import com.amd.aparapi.internal.instruction.InstructionSet.AssignToField;
-import com.amd.aparapi.internal.instruction.InstructionSet.I_ARRAYLENGTH;
-import com.amd.aparapi.internal.instruction.InstructionSet.I_AALOAD;
-import com.amd.aparapi.internal.instruction.InstructionSet.I_GETFIELD;
-import com.amd.aparapi.internal.instruction.InstructionSet.I_INVOKESPECIAL;
-import com.amd.aparapi.internal.instruction.InstructionSet.I_INVOKESTATIC;
-import com.amd.aparapi.internal.instruction.InstructionSet.I_INVOKEVIRTUAL;
-import com.amd.aparapi.internal.instruction.InstructionSet.MethodCall;
-import com.amd.aparapi.internal.instruction.InstructionSet.TypeSpec;
-import com.amd.aparapi.internal.instruction.InstructionSet.VirtualMethodCall;
-import com.amd.aparapi.internal.model.ClassModel.ClassModelField;
-import com.amd.aparapi.internal.model.ClassModel.ClassModelMethod;
-import com.amd.aparapi.internal.model.ClassModel.ConstantPool.FieldEntry;
-import com.amd.aparapi.internal.model.ClassModel.ConstantPool.MethodEntry;
-import com.amd.aparapi.internal.model.ClassModel.ConstantPool.MethodReferenceEntry.Arg;
-import com.amd.aparapi.internal.util.UnsafeWrapper;
+import com.amd.aparapi.*;
+import com.amd.aparapi.internal.exception.*;
+import com.amd.aparapi.internal.instruction.*;
+import com.amd.aparapi.internal.instruction.InstructionSet.*;
+import com.amd.aparapi.internal.model.ClassModel.*;
+import com.amd.aparapi.internal.model.ClassModel.ConstantPool.*;
+import com.amd.aparapi.internal.model.ClassModel.ConstantPool.MethodReferenceEntry.*;
+import com.amd.aparapi.internal.util.*;
+
+import java.lang.reflect.*;
+import java.util.*;
+import java.util.logging.*;
 
 public class Entrypoint{
 
@@ -192,6 +164,9 @@ public class Entrypoint{
          if (type.isPrimitive() || type.isArray()) {
             return field;
          }
+         if (field.getAnnotation(Kernel.NoCL.class) != null) {
+            return null;
+         }
          if (logger.isLoggable(Level.FINE)) {
             logger.fine("field type is " + type.getName());
          }
@@ -489,7 +464,7 @@ public class Entrypoint{
       for (final MethodCall methodCall : methodModel.getMethodCalls()) {
 
          ClassModelMethod m = resolveCalledMethod(methodCall, classModel);
-         if ((m != null) && !methodMap.keySet().contains(m)) {
+         if ((m != null) && !methodMap.keySet().contains(m) && !noCL(m)) {
             final MethodModel target = new MethodModel(m, this);
             methodMap.put(m, target);
             methodModel.getCalledMethods().add(target);
@@ -505,7 +480,7 @@ public class Entrypoint{
             for (final MethodCall methodCall : mm.getMethodCalls()) {
 
                ClassModelMethod m = resolveCalledMethod(methodCall, classModel);
-               if (m != null) {
+               if (m != null && !noCL(m)) {
                   MethodModel target = null;
                   if (methodMap.keySet().contains(m)) {
                      // we remove and then add again.  Because this is a LinkedHashMap this 
@@ -678,30 +653,39 @@ public class Entrypoint{
 
                   }
 
-               } else if (instruction instanceof I_INVOKEVIRTUAL) {
+               }
+               else if (instruction instanceof I_INVOKEVIRTUAL) {
                   final I_INVOKEVIRTUAL invokeInstruction = (I_INVOKEVIRTUAL) instruction;
-                  final MethodEntry methodEntry = invokeInstruction.getConstantPoolMethodEntry();
-                  if (Kernel.isMappedMethod(methodEntry)) { //only do this for intrinsics
+                  MethodModel invokedMethod = invokeInstruction.getMethod();
+                  FieldEntry getterField = getSimpleGetterField(invokedMethod);
+                  if (getterField != null) {
+                     referencedFieldNames.add(getterField.getNameAndTypeEntry().getNameUTF8Entry().getUTF8());
+                  }
+                  else {
+                     final MethodEntry methodEntry = invokeInstruction.getConstantPoolMethodEntry();
+                     if (Kernel.isMappedMethod(methodEntry)) { //only do this for intrinsics
 
-                     if (Kernel.usesAtomic32(methodEntry)) {
-                        setRequiresAtomics32Pragma(true);
-                     }
+                        if (Kernel.usesAtomic32(methodEntry)) {
+                           setRequiresAtomics32Pragma(true);
+                        }
 
-                     final Arg methodArgs[] = methodEntry.getArgs();
-                     if ((methodArgs.length > 0) && methodArgs[0].isArray()) { //currently array arg can only take slot 0
-                        final Instruction arrInstruction = invokeInstruction.getArg(0);
-                        if (arrInstruction instanceof AccessField) {
-                           final AccessField access = (AccessField) arrInstruction;
-                           final FieldEntry field = access.getConstantPoolFieldEntry();
-                           final String accessedFieldName = field.getNameAndTypeEntry().getNameUTF8Entry().getUTF8();
-                           arrayFieldAssignments.add(accessedFieldName);
-                           referencedFieldNames.add(accessedFieldName);
-                        } else {
-                           throw new ClassParseException(ClassParseException.TYPE.ACCESSEDOBJECTSETTERARRAY);
+                        final Arg methodArgs[] = methodEntry.getArgs();
+                        if ((methodArgs.length > 0) && methodArgs[0].isArray()) { //currently array arg can only take slot 0
+                           final Instruction arrInstruction = invokeInstruction.getArg(0);
+                           if (arrInstruction instanceof AccessField) {
+                              final AccessField access = (AccessField) arrInstruction;
+                              final FieldEntry field = access.getConstantPoolFieldEntry();
+                              final String accessedFieldName = field.getNameAndTypeEntry().getNameUTF8Entry().getUTF8();
+                              arrayFieldAssignments.add(accessedFieldName);
+                              referencedFieldNames.add(accessedFieldName);
+                           }
+                           else {
+                              throw new ClassParseException(ClassParseException.TYPE.ACCESSEDOBJECTSETTERARRAY);
+                           }
                         }
                      }
-                  }
 
+                  }
                }
             }
          }
@@ -814,6 +798,15 @@ public class Entrypoint{
       }
    }
 
+   private boolean noCL(ClassModelMethod m) {
+      boolean found = m.getClassModel().getNoCLMethods().contains(m.getName());
+      return found;
+   }
+
+   private FieldEntry getSimpleGetterField(MethodModel method) {
+      return method.getAccessorVariableFieldEntry();
+   }
+
    public boolean shouldFallback() {
       return (fallback);
    }
diff --git a/com.amd.aparapi/src/java/com/amd/aparapi/internal/model/MethodModel.java b/com.amd.aparapi/src/java/com/amd/aparapi/internal/model/MethodModel.java
index 7bda9e2e..c6d7fc9b 100644
--- a/com.amd.aparapi/src/java/com/amd/aparapi/internal/model/MethodModel.java
+++ b/com.amd.aparapi/src/java/com/amd/aparapi/internal/model/MethodModel.java
@@ -37,79 +37,19 @@ under those regulations, please refer to the U.S. Bureau of Industry and Securit
  */
 package com.amd.aparapi.internal.model;
 
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.Comparator;
-import java.util.LinkedHashMap;
-import java.util.HashSet;
-import java.util.Iterator;
-import java.util.LinkedList;
-import java.util.List;
-import java.util.Map;
+import com.amd.aparapi.*;
+import com.amd.aparapi.internal.exception.*;
+import com.amd.aparapi.internal.instruction.*;
+import com.amd.aparapi.internal.instruction.InstructionPattern.*;
+import com.amd.aparapi.internal.instruction.InstructionSet.*;
+import com.amd.aparapi.internal.model.ClassModel.*;
+import com.amd.aparapi.internal.model.ClassModel.ConstantPool.*;
+import com.amd.aparapi.internal.model.ClassModel.ConstantPool.MethodReferenceEntry.*;
+import com.amd.aparapi.internal.reader.*;
+
+import java.util.*;
 import java.util.Map.Entry;
-import java.util.Set;
-import java.util.logging.Level;
-import java.util.logging.Logger;
-
-import com.amd.aparapi.Config;
-import com.amd.aparapi.Kernel;
-import com.amd.aparapi.internal.exception.AparapiException;
-import com.amd.aparapi.internal.exception.ClassParseException;
-import com.amd.aparapi.internal.instruction.ExpressionList;
-import com.amd.aparapi.internal.instruction.Instruction;
-import com.amd.aparapi.internal.instruction.InstructionPattern;
-import com.amd.aparapi.internal.instruction.InstructionSet;
-import com.amd.aparapi.internal.instruction.InstructionTransformer;
-import com.amd.aparapi.internal.instruction.InstructionPattern.InstructionMatch;
-import com.amd.aparapi.internal.instruction.InstructionSet.AccessArrayElement;
-import com.amd.aparapi.internal.instruction.InstructionSet.AccessField;
-import com.amd.aparapi.internal.instruction.InstructionSet.AccessInstanceField;
-import com.amd.aparapi.internal.instruction.InstructionSet.AccessLocalVariable;
-import com.amd.aparapi.internal.instruction.InstructionSet.AssignToArrayElement;
-import com.amd.aparapi.internal.instruction.InstructionSet.AssignToInstanceField;
-import com.amd.aparapi.internal.instruction.InstructionSet.AssignToLocalVariable;
-import com.amd.aparapi.internal.instruction.InstructionSet.Branch;
-import com.amd.aparapi.internal.instruction.InstructionSet.CastOperator;
-import com.amd.aparapi.internal.instruction.InstructionSet.CloneInstruction;
-import com.amd.aparapi.internal.instruction.InstructionSet.DUP;
-import com.amd.aparapi.internal.instruction.InstructionSet.FieldArrayElementAssign;
-import com.amd.aparapi.internal.instruction.InstructionSet.FieldArrayElementIncrement;
-import com.amd.aparapi.internal.instruction.InstructionSet.I_AASTORE;
-import com.amd.aparapi.internal.instruction.InstructionSet.I_ARETURN;
-import com.amd.aparapi.internal.instruction.InstructionSet.I_ATHROW;
-import com.amd.aparapi.internal.instruction.InstructionSet.I_BASTORE;
-import com.amd.aparapi.internal.instruction.InstructionSet.I_CASTORE;
-import com.amd.aparapi.internal.instruction.InstructionSet.I_DUP;
-import com.amd.aparapi.internal.instruction.InstructionSet.I_DUP2;
-import com.amd.aparapi.internal.instruction.InstructionSet.I_DUP_X1;
-import com.amd.aparapi.internal.instruction.InstructionSet.I_DUP_X2;
-import com.amd.aparapi.internal.instruction.InstructionSet.I_GETSTATIC;
-import com.amd.aparapi.internal.instruction.InstructionSet.I_IADD;
-import com.amd.aparapi.internal.instruction.InstructionSet.I_ICONST_1;
-import com.amd.aparapi.internal.instruction.InstructionSet.I_IINC;
-import com.amd.aparapi.internal.instruction.InstructionSet.I_INVOKEINTERFACE;
-import com.amd.aparapi.internal.instruction.InstructionSet.I_LOOKUPSWITCH;
-import com.amd.aparapi.internal.instruction.InstructionSet.I_MONITORENTER;
-import com.amd.aparapi.internal.instruction.InstructionSet.I_MONITOREXIT;
-import com.amd.aparapi.internal.instruction.InstructionSet.I_PUTFIELD;
-import com.amd.aparapi.internal.instruction.InstructionSet.I_PUTSTATIC;
-import com.amd.aparapi.internal.instruction.InstructionSet.I_TABLESWITCH;
-import com.amd.aparapi.internal.instruction.InstructionSet.IncrementInstruction;
-import com.amd.aparapi.internal.instruction.InstructionSet.InlineAssignInstruction;
-import com.amd.aparapi.internal.instruction.InstructionSet.MethodCall;
-import com.amd.aparapi.internal.instruction.InstructionSet.MultiAssignInstruction;
-import com.amd.aparapi.internal.instruction.InstructionSet.New;
-import com.amd.aparapi.internal.instruction.InstructionSet.Return;
-import com.amd.aparapi.internal.instruction.InstructionSet.StoreSpec;
-import com.amd.aparapi.internal.model.ClassModel.ClassModelMethod;
-import com.amd.aparapi.internal.model.ClassModel.ConstantPool;
-import com.amd.aparapi.internal.model.ClassModel.LocalVariableInfo;
-import com.amd.aparapi.internal.model.ClassModel.LocalVariableTableEntry;
-import com.amd.aparapi.internal.model.ClassModel.MethodDescription;
-import com.amd.aparapi.internal.model.ClassModel.ConstantPool.FieldEntry;
-import com.amd.aparapi.internal.model.ClassModel.ConstantPool.MethodReferenceEntry;
-import com.amd.aparapi.internal.model.ClassModel.ConstantPool.MethodReferenceEntry.Arg;
-import com.amd.aparapi.internal.reader.ByteReader;
+import java.util.logging.*;
 
 public class MethodModel{
 
@@ -133,11 +73,15 @@ public class MethodModel{
 
    private boolean methodIsSetter;
 
+   private boolean methodIsPrivateMemoryGetter = false;
+
    // Only setters can use putfield
    private boolean usesPutfield;
 
    private FieldEntry accessorVariableFieldEntry;
 
+   private boolean noCL = false;
+
    public boolean isGetter() {
       return methodIsGetter;
    }
@@ -150,6 +94,15 @@ public class MethodModel{
       return usesPutfield;
    }
 
+   public boolean isNoCL() {
+      return noCL;
+   }
+
+
+   public boolean isPrivateMemoryGetter() {
+      return methodIsPrivateMemoryGetter;
+   }
+
    public ClassModelMethod getMethod() {
       return method;
    }
@@ -339,10 +292,6 @@ public class MethodModel{
     * have the branch node (at 100) added to it's forwardUnconditional list.
     * 
     * @see InstructionSet.Branch#getTarget()
-    * @see Instruction#getForwardConditionalTargets()
-    * @see Instruction#getForwardUnconditionalTargets()
-    * @see Instruction#getReverseConditionalTargets()
-    * @see Instruction#getReverseUnconditionalTargets()
     */
    public void buildBranchGraphs(Map<Integer, Instruction> pcMap) {
       for (Instruction instruction = pcHead; instruction != null; instruction = instruction.getNextPC()) {
@@ -1344,60 +1293,75 @@ public class MethodModel{
    void checkForGetter(Map<Integer, Instruction> pcMap) throws ClassParseException {
       final String methodName = getMethod().getName();
       String rawVarNameCandidate = null;
-      boolean mightBeSetter = true;
+      boolean mightBeGetter = true;
 
       if (methodName.startsWith("get")) {
          rawVarNameCandidate = methodName.substring(3);
       } else if (methodName.startsWith("is")) {
          rawVarNameCandidate = methodName.substring(2);
       } else {
-         mightBeSetter = false;
+         mightBeGetter = false;
       }
 
       // Getters should have 3 bcs: aload_0, getfield, ?return
-      if (mightBeSetter) {
-         if ((rawVarNameCandidate != null) && (pcMap.size() == 3)) {
+      if (mightBeGetter) {
+         boolean possiblySimpleGetImplementation = pcMap.size() == 3;
+         if ((rawVarNameCandidate != null) && (isNoCL() || possiblySimpleGetImplementation)) {
             final String firstLetter = rawVarNameCandidate.substring(0, 1).toLowerCase();
-            final String varNameCandidateCamelCased = rawVarNameCandidate.replaceFirst(rawVarNameCandidate.substring(0, 1),
-                  firstLetter);
-            String accessedFieldName = null;
-            Instruction instruction = expressionList.getHead();
-
-            if ((instruction instanceof Return) && (expressionList.getHead() == expressionList.getTail())) {
-               instruction = instruction.getPrevPC();
-               if (instruction instanceof AccessInstanceField) {
-                  final FieldEntry field = ((AccessInstanceField) instruction).getConstantPoolFieldEntry();
-                  accessedFieldName = field.getNameAndTypeEntry().getNameUTF8Entry().getUTF8();
-                  if (accessedFieldName.equals(varNameCandidateCamelCased)) {
-
-                     // Verify field type matches return type
-                     final String fieldType = field.getNameAndTypeEntry().getDescriptorUTF8Entry().getUTF8();
-                     final String returnType = getMethod().getDescriptor().substring(2);
-                     //System.out.println( "### field type = " + fieldType );
-                     //System.out.println( "### method args = " + returnType );
-                     assert (fieldType.length() == 1) && (returnType.length() == 1) : " can only use basic type getters";
-
-                     // Allow isFoo style for boolean fields
-                     if ((methodName.startsWith("is") && fieldType.equals("Z")) || (methodName.startsWith("get"))) {
-                        if (fieldType.equals(returnType)) {
-                           if (logger.isLoggable(Level.FINE)) {
-                              logger.fine("Found " + methodName + " as a getter for " + varNameCandidateCamelCased.toLowerCase());
-                           }
+            final String varNameCandidateCamelCased = rawVarNameCandidate.replaceFirst(rawVarNameCandidate.substring(0, 1), firstLetter);
+            String accessedFieldName;
+
+            if (!isNoCL()) {
+
+               Instruction instruction = expressionList.getHead();
+
+               if ((instruction instanceof Return) && (expressionList.getHead() == expressionList.getTail())) {
+                  instruction = instruction.getPrevPC();
+                  if (instruction instanceof AccessInstanceField) {
+                     final FieldEntry field = ((AccessInstanceField) instruction).getConstantPoolFieldEntry();
+                     accessedFieldName = field.getNameAndTypeEntry().getNameUTF8Entry().getUTF8();
+                     if (accessedFieldName.equals(varNameCandidateCamelCased)) {
+
+                        // Verify field type matches return type
+                        final String fieldType = field.getNameAndTypeEntry().getDescriptorUTF8Entry().getUTF8();
+                        final String returnType = getMethod().getDescriptor().substring(2);
+                        //System.out.println( "### field type = " + fieldType );
+                        //System.out.println( "### method args = " + returnType );
+                        assert (fieldType.length() == 1) && (returnType.length() == 1) : " can only use basic type getters";
+
+                        // Allow isFoo style for boolean fields
+                        if ((methodName.startsWith("is") && fieldType.equals("Z")) || (methodName.startsWith("get"))) {
+                           if (fieldType.equals(returnType)) {
+                              if (logger.isLoggable(Level.FINE)) {
+                                 logger.fine("Found " + methodName + " as a getter for " + varNameCandidateCamelCased.toLowerCase());
+                              }
 
-                           methodIsGetter = true;
-                           accessorVariableFieldEntry = field;
-                           assert methodIsSetter == false : " cannot be both";
-                        } else {
-                           throw new ClassParseException(ClassParseException.TYPE.BADGETTERTYPEMISMATCH, methodName);
+                              methodIsGetter = true;
+                              setAccessorVariableFieldEntry(field);
+                              assert methodIsSetter == false : " cannot be both";
+                           } else {
+                              throw new ClassParseException(ClassParseException.TYPE.BADGETTERTYPEMISMATCH, methodName);
 
+                           }
                         }
+                     } else {
+                        throw new ClassParseException(ClassParseException.TYPE.BADGETTERNAMEMISMATCH, methodName);
                      }
-                  } else {
-                     throw new ClassParseException(ClassParseException.TYPE.BADGETTERNAMEMISMATCH, methodName);
                   }
+               } else {
+                  throw new ClassParseException(ClassParseException.TYPE.BADGETTERNAMENOTFOUND, methodName);
                }
             } else {
-               throw new ClassParseException(ClassParseException.TYPE.BADGETTERNAMENOTFOUND, methodName);
+               FieldEntry fieldEntry = getMethod().getOwnerClassModel().getConstantPool().getFieldEntry(varNameCandidateCamelCased);
+               setAccessorVariableFieldEntry(fieldEntry);
+               if (getAccessorVariableFieldEntry() == null) {
+                  throw new ClassParseException(ClassParseException.TYPE.BADGETTERNAMEMISMATCH, methodName);
+               }
+               methodIsGetter = true;
+               if (method.getClassModel().getPrivateMemorySize(fieldEntry.getNameAndTypeEntry().getNameUTF8Entry().getUTF8()) != null)
+               {
+                  methodIsPrivateMemoryGetter = true;
+               }
             }
          } else {
             throw new ClassParseException(ClassParseException.TYPE.BADGETTERNAMENOTFOUND, methodName);
@@ -1405,6 +1369,10 @@ public class MethodModel{
       }
    }
 
+   private void setAccessorVariableFieldEntry(FieldEntry field) {
+      accessorVariableFieldEntry = field;
+   }
+
    /**
     * Determine if this method is a setter and record the accessed field if so
     */
@@ -1441,7 +1409,7 @@ public class MethodModel{
                      }
 
                      methodIsSetter = true;
-                     accessorVariableFieldEntry = field;
+                     setAccessorVariableFieldEntry(field);
 
                      // Setters use putfield which will miss the normal store check
                      if (fieldType.equals("B") || fieldType.equals("Z")) {
@@ -1638,6 +1606,10 @@ public class MethodModel{
       try {
          method = _method;
          expressionList = new ExpressionList(this);
+         ClassModel owner = _method.getOwnerClassModel();
+         if (owner.getNoCLMethods().contains(method.getName())) {
+             noCL = true;
+         }
 
          // check if we have any exception handlers
          final int exceptionsSize = method.getCodeEntry().getExceptionPoolEntries().size();
@@ -1676,7 +1648,7 @@ public class MethodModel{
          foldExpressions();
 
          // Accessor conversion only works on member object arrays
-         if ((entrypoint != null) && (_method.getClassModel() != entrypoint.getClassModel())) {
+         if (isNoCL() || (entrypoint != null) && (_method.getClassModel() != entrypoint.getClassModel())) {
             if (logger.isLoggable(Level.FINE)) {
                logger.fine("Considering accessor call: " + getName());
             }
diff --git a/com.amd.aparapi/src/java/com/amd/aparapi/internal/writer/BlockWriter.java b/com.amd.aparapi/src/java/com/amd/aparapi/internal/writer/BlockWriter.java
index 7cf912fa..9e9efc63 100644
--- a/com.amd.aparapi/src/java/com/amd/aparapi/internal/writer/BlockWriter.java
+++ b/com.amd.aparapi/src/java/com/amd/aparapi/internal/writer/BlockWriter.java
@@ -37,61 +37,16 @@ under those regulations, please refer to the U.S. Bureau of Industry and Securit
 */
 package com.amd.aparapi.internal.writer;
 
-import java.util.Stack;
-
-import com.amd.aparapi.Config;
-import com.amd.aparapi.internal.exception.CodeGenException;
-import com.amd.aparapi.internal.instruction.BranchSet;
-import com.amd.aparapi.internal.instruction.Instruction;
-import com.amd.aparapi.internal.instruction.BranchSet.CompoundLogicalExpressionNode;
-import com.amd.aparapi.internal.instruction.BranchSet.LogicalExpressionNode;
-import com.amd.aparapi.internal.instruction.BranchSet.SimpleLogicalExpressionNode;
-import com.amd.aparapi.internal.instruction.InstructionSet.AccessArrayElement;
-import com.amd.aparapi.internal.instruction.InstructionSet.AccessField;
-import com.amd.aparapi.internal.instruction.InstructionSet.AccessInstanceField;
-import com.amd.aparapi.internal.instruction.InstructionSet.AccessLocalVariable;
-import com.amd.aparapi.internal.instruction.InstructionSet.AssignToArrayElement;
-import com.amd.aparapi.internal.instruction.InstructionSet.AssignToField;
-import com.amd.aparapi.internal.instruction.InstructionSet.AssignToInstanceField;
-import com.amd.aparapi.internal.instruction.InstructionSet.AssignToLocalVariable;
-import com.amd.aparapi.internal.instruction.InstructionSet.BinaryOperator;
-import com.amd.aparapi.internal.instruction.InstructionSet.Branch;
-import com.amd.aparapi.internal.instruction.InstructionSet.ByteCode;
-import com.amd.aparapi.internal.instruction.InstructionSet.CastOperator;
-import com.amd.aparapi.internal.instruction.InstructionSet.CloneInstruction;
-import com.amd.aparapi.internal.instruction.InstructionSet.CompositeArbitraryScopeInstruction;
-import com.amd.aparapi.internal.instruction.InstructionSet.CompositeDoWhileInstruction;
-import com.amd.aparapi.internal.instruction.InstructionSet.CompositeEmptyLoopInstruction;
-import com.amd.aparapi.internal.instruction.InstructionSet.CompositeForEclipseInstruction;
-import com.amd.aparapi.internal.instruction.InstructionSet.CompositeForSunInstruction;
-import com.amd.aparapi.internal.instruction.InstructionSet.CompositeIfElseInstruction;
-import com.amd.aparapi.internal.instruction.InstructionSet.CompositeIfInstruction;
-import com.amd.aparapi.internal.instruction.InstructionSet.CompositeInstruction;
-import com.amd.aparapi.internal.instruction.InstructionSet.CompositeWhileInstruction;
-import com.amd.aparapi.internal.instruction.InstructionSet.ConditionalBranch16;
-import com.amd.aparapi.internal.instruction.InstructionSet.Constant;
-import com.amd.aparapi.internal.instruction.InstructionSet.FieldArrayElementAssign;
-import com.amd.aparapi.internal.instruction.InstructionSet.FieldArrayElementIncrement;
-import com.amd.aparapi.internal.instruction.InstructionSet.I_ALOAD_0;
-import com.amd.aparapi.internal.instruction.InstructionSet.I_AALOAD;
-import com.amd.aparapi.internal.instruction.InstructionSet.I_ARRAYLENGTH;
-import com.amd.aparapi.internal.instruction.InstructionSet.I_IFNONNULL;
-import com.amd.aparapi.internal.instruction.InstructionSet.I_IFNULL;
-import com.amd.aparapi.internal.instruction.InstructionSet.I_IINC;
-import com.amd.aparapi.internal.instruction.InstructionSet.I_POP;
-import com.amd.aparapi.internal.instruction.InstructionSet.If;
-import com.amd.aparapi.internal.instruction.InstructionSet.IfUnary;
-import com.amd.aparapi.internal.instruction.InstructionSet.IncrementInstruction;
-import com.amd.aparapi.internal.instruction.InstructionSet.InlineAssignInstruction;
-import com.amd.aparapi.internal.instruction.InstructionSet.MethodCall;
-import com.amd.aparapi.internal.instruction.InstructionSet.MultiAssignInstruction;
-import com.amd.aparapi.internal.instruction.InstructionSet.Return;
-import com.amd.aparapi.internal.instruction.InstructionSet.UnaryOperator;
-import com.amd.aparapi.internal.instruction.InstructionSet.VirtualMethodCall;
-import com.amd.aparapi.internal.model.Entrypoint;
-import com.amd.aparapi.internal.model.MethodModel;
-import com.amd.aparapi.internal.model.ClassModel.ConstantPool.MethodEntry;
-import com.amd.aparapi.internal.model.ClassModel.LocalVariableInfo;
+import com.amd.aparapi.*;
+import com.amd.aparapi.internal.exception.*;
+import com.amd.aparapi.internal.instruction.*;
+import com.amd.aparapi.internal.instruction.BranchSet.*;
+import com.amd.aparapi.internal.instruction.InstructionSet.*;
+import com.amd.aparapi.internal.model.ClassModel.ConstantPool.*;
+import com.amd.aparapi.internal.model.ClassModel.*;
+import com.amd.aparapi.internal.model.*;
+
+import java.util.*;
 
 /**
  * Base abstract class for converting <code>Aparapi</code> IR to text.<br/>
@@ -330,6 +285,19 @@ public abstract class BlockWriter{
 
    }
 
+   protected void writeGetterBlock(FieldEntry accessorVariableFieldEntry) {
+      write("{");
+      in();
+      newLine();
+      write("return this->");
+      write(accessorVariableFieldEntry.getNameAndTypeEntry().getNameUTF8Entry().getUTF8());
+      write(";");
+      out();
+      newLine();
+
+      write("}");
+   }
+
    public void writeBlock(Instruction _first, Instruction _last) throws CodeGenException {
       write("{");
       in();
@@ -756,6 +724,10 @@ public abstract class BlockWriter{
    }
 
    public void writeMethod(MethodCall _methodCall, MethodEntry _methodEntry) throws CodeGenException {
+      boolean noCL = _methodEntry.getOwnerClassModel().getNoCLMethods().contains(_methodEntry.getNameAndTypeEntry().getNameUTF8Entry().getUTF8());
+      if (noCL) {
+          return;
+      }
 
       if (_methodCall instanceof VirtualMethodCall) {
          final Instruction instanceInstruction = ((VirtualMethodCall) _methodCall).getInstanceReference();
@@ -785,7 +757,13 @@ public abstract class BlockWriter{
    }
 
    public void writeMethodBody(MethodModel _methodModel) throws CodeGenException {
-      writeBlock(_methodModel.getExprHead(), null);
+      if (_methodModel.isGetter() && !_methodModel.isNoCL()) {
+         FieldEntry accessorVariableFieldEntry = _methodModel.getAccessorVariableFieldEntry();
+         writeGetterBlock(accessorVariableFieldEntry);
+      }
+      else {
+         writeBlock(_methodModel.getExprHead(), null);
+      }
    }
 
    public abstract void write(Entrypoint entryPoint) throws CodeGenException;
diff --git a/com.amd.aparapi/src/java/com/amd/aparapi/internal/writer/KernelWriter.java b/com.amd.aparapi/src/java/com/amd/aparapi/internal/writer/KernelWriter.java
index 04119e92..23d5c158 100644
--- a/com.amd.aparapi/src/java/com/amd/aparapi/internal/writer/KernelWriter.java
+++ b/com.amd.aparapi/src/java/com/amd/aparapi/internal/writer/KernelWriter.java
@@ -37,42 +37,20 @@ under those regulations, please refer to the U.S. Bureau of Industry and Securit
  */
 package com.amd.aparapi.internal.writer;
 
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Map;
-
-import com.amd.aparapi.Config;
+import com.amd.aparapi.*;
 import com.amd.aparapi.Kernel;
-import com.amd.aparapi.internal.exception.CodeGenException;
-import com.amd.aparapi.internal.instruction.Instruction;
-import com.amd.aparapi.internal.instruction.InstructionSet;
-import com.amd.aparapi.internal.instruction.InstructionSet.AccessArrayElement;
-import com.amd.aparapi.internal.instruction.InstructionSet.AccessField;
-import com.amd.aparapi.internal.instruction.InstructionSet.AssignToArrayElement;
-import com.amd.aparapi.internal.instruction.InstructionSet.AssignToField;
-import com.amd.aparapi.internal.instruction.InstructionSet.AssignToLocalVariable;
-import com.amd.aparapi.internal.instruction.InstructionSet.BinaryOperator;
-import com.amd.aparapi.internal.instruction.InstructionSet.I_ALOAD_0;
-import com.amd.aparapi.internal.instruction.InstructionSet.I_GETFIELD;
-import com.amd.aparapi.internal.instruction.InstructionSet.I_INVOKESPECIAL;
-import com.amd.aparapi.internal.instruction.InstructionSet.I_IUSHR;
-import com.amd.aparapi.internal.instruction.InstructionSet.I_LUSHR;
-import com.amd.aparapi.internal.instruction.InstructionSet.MethodCall;
-import com.amd.aparapi.internal.instruction.InstructionSet.VirtualMethodCall;
-import com.amd.aparapi.internal.model.ClassModel;
-import com.amd.aparapi.internal.model.ClassModel.ClassModelField;
-import com.amd.aparapi.internal.model.ClassModel.LocalVariableInfo;
-import com.amd.aparapi.internal.model.ClassModel.LocalVariableTableEntry;
-import com.amd.aparapi.internal.model.ClassModel.AttributePool.RuntimeAnnotationsEntry;
-import com.amd.aparapi.internal.model.ClassModel.AttributePool.RuntimeAnnotationsEntry.AnnotationInfo;
-import com.amd.aparapi.internal.model.ClassModel.ConstantPool.FieldEntry;
-import com.amd.aparapi.internal.model.ClassModel.ConstantPool.MethodEntry;
-import com.amd.aparapi.internal.model.Entrypoint;
-import com.amd.aparapi.internal.model.MethodModel;
+import com.amd.aparapi.internal.exception.*;
+import com.amd.aparapi.internal.instruction.*;
+import com.amd.aparapi.internal.instruction.InstructionSet.*;
+import com.amd.aparapi.internal.model.*;
+import com.amd.aparapi.internal.model.ClassModel.AttributePool.*;
+import com.amd.aparapi.internal.model.ClassModel.AttributePool.RuntimeAnnotationsEntry.*;
+import com.amd.aparapi.internal.model.ClassModel.*;
+import com.amd.aparapi.internal.model.ClassModel.ConstantPool.*;
 import com.amd.aparapi.opencl.OpenCL.Constant;
-import com.amd.aparapi.opencl.OpenCL.Local;
+import com.amd.aparapi.opencl.OpenCL.*;
+
+import java.util.*;
 
 public abstract class KernelWriter extends BlockWriter{
 
@@ -189,10 +167,6 @@ public abstract class KernelWriter extends BlockWriter{
    }
 
    @Override public void writeMethod(MethodCall _methodCall, MethodEntry _methodEntry) throws CodeGenException {
-
-      // System.out.println("_methodEntry = " + _methodEntry);
-      // special case for buffers
-
       final int argc = _methodEntry.getStackConsumeCount();
 
       final String methodName = _methodEntry.getNameAndTypeEntry().getNameUTF8Entry().getUTF8();
@@ -217,16 +191,30 @@ public abstract class KernelWriter extends BlockWriter{
             write(barrierAndGetterMappings);
          }
       } else {
+         final boolean isSpecial = _methodCall instanceof I_INVOKESPECIAL;
+         MethodModel m = entryPoint.getCallTarget(_methodEntry, isSpecial);
 
+         FieldEntry getterField = null;
+         if (m != null && m.isGetter()) {
+            getterField = m.getAccessorVariableFieldEntry();
+         }
+         if (getterField != null) {
+             String fieldName = getterField.getNameAndTypeEntry().getNameUTF8Entry().getUTF8();
+             write("this->");
+             write(fieldName);
+             return;
+         }
+         boolean noCL = _methodEntry.getOwnerClassModel().getNoCLMethods().contains(_methodEntry.getNameAndTypeEntry().getNameUTF8Entry().getUTF8());
+         if (noCL) {
+             return;
+         }
          final String intrinsicMapping = Kernel.getMappedMethodName(_methodEntry);
          // System.out.println("getMappedMethodName for " + methodName + " returned " + mapping);
          boolean isIntrinsic = false;
 
          if (intrinsicMapping == null) {
             assert entryPoint != null : "entryPoint should not be null";
-            final boolean isSpecial = _methodCall instanceof I_INVOKESPECIAL;
             boolean isMapped = Kernel.isMappedMethod(_methodEntry);
-            final MethodModel m = entryPoint.getCallTarget(_methodEntry, isSpecial);
 
             if (m != null) {
                write(m.getName());
@@ -283,6 +271,8 @@ public abstract class KernelWriter extends BlockWriter{
 
    public final static String __constant = "__constant";
 
+   public final static String __private = "__private";
+
    public final static String LOCAL_ANNOTATION_NAME = "L" + Local.class.getName().replace(".", "/") + ";";
 
    public final static String CONSTANT_ANNOTATION_NAME = "L" + Constant.class.getName().replace(".", "/") + ";";
@@ -306,9 +296,20 @@ public abstract class KernelWriter extends BlockWriter{
 
          int numDimensions = 0;
 
-         // check the suffix 
+         // check the suffix
+
          String type = field.getName().endsWith(Kernel.LOCAL_SUFFIX) ? __local
                : (field.getName().endsWith(Kernel.CONSTANT_SUFFIX) ? __constant : __global);
+         Integer privateMemorySize = null;
+         try {
+            privateMemorySize = _entryPoint.getClassModel().getPrivateMemorySize(field.getName());
+         } catch (ClassParseException e) {
+            throw new CodeGenException(e);
+         }
+
+         if (privateMemorySize != null) {
+             type = __private;
+         }
          final RuntimeAnnotationsEntry visibleAnnotations = field.getAttributePool().getRuntimeVisibleAnnotationsEntry();
 
          if (visibleAnnotations != null) {
@@ -322,11 +323,13 @@ public abstract class KernelWriter extends BlockWriter{
             }
          }
 
+         String argType = (__private.equals(type)) ? __constant : type;
+
          //if we have a an array we want to mark the object as a pointer
          //if we have a multiple dimensional array we want to remember the number of dimensions
          while (signature.startsWith("[")) {
             if(isPointer == false) {
-               argLine.append(type + " ");
+               argLine.append(argType + " ");
                thisStructLine.append(type + " ");
             }
             isPointer = true;
@@ -342,7 +345,6 @@ public abstract class KernelWriter extends BlockWriter{
             // if (logger.isLoggable(Level.FINE)) {
             // logger.fine("Examining object parameter: " + signature + " new: " + className);
             // }
-
             argLine.append(className);
             thisStructLine.append(className);
          } else {
@@ -355,16 +357,27 @@ public abstract class KernelWriter extends BlockWriter{
 
          if (isPointer) {
             argLine.append("*");
-            thisStructLine.append("*");
+            if (privateMemorySize == null) {
+               thisStructLine.append("*");
+            }
+         }
+
+         if (privateMemorySize == null) {
+            assignLine.append("this->");
+            assignLine.append(field.getName());
+            assignLine.append(" = ");
+            assignLine.append(field.getName());
          }
-         assignLine.append("this->");
-         assignLine.append(field.getName());
-         assignLine.append(" = ");
-         assignLine.append(field.getName());
+
          argLine.append(field.getName());
          thisStructLine.append(field.getName());
-         assigns.add(assignLine.toString());
+         if (privateMemorySize == null) {
+            assigns.add(assignLine.toString());
+         }
          argLines.add(argLine.toString());
+         if (privateMemorySize != null) {
+            thisStructLine.append("[").append(privateMemorySize).append("]");
+         }
          thisStruct.add(thisStructLine.toString());
 
          // Add int field into "this" struct for supporting java arraylength op
@@ -538,9 +551,12 @@ public abstract class KernelWriter extends BlockWriter{
 
       for (final MethodModel mm : _entryPoint.getCalledMethods()) {
          // write declaration :)
+         if (mm.isPrivateMemoryGetter()) {
+            continue;
+         }
 
          final String returnType = mm.getReturnType();
-         // Arrays always map to __global arrays
+         // Arrays always map to __private or__global arrays
          if (returnType.startsWith("[")) {
             write(" __global ");
          }
diff --git a/samples/median/src/com/amd/aparapi/sample/median/MedianDemo.java b/samples/median/src/com/amd/aparapi/sample/median/MedianDemo.java
new file mode 100644
index 00000000..2e938d75
--- /dev/null
+++ b/samples/median/src/com/amd/aparapi/sample/median/MedianDemo.java
@@ -0,0 +1,63 @@
+package com.amd.aparapi.sample.median;
+
+import com.amd.aparapi.Kernel;
+
+import javax.imageio.*;
+import javax.swing.*;
+import java.awt.*;
+import java.awt.image.*;
+import java.io.*;
+
+/**
+ * Demonstrate use of __private namespaces and @NoCL annotations.
+ */
+public class MedianDemo {
+   public final static BufferedImage testImage;
+
+   static {
+      try {
+         testImage = ImageIO.read(new File("C:\\dev\\aparapi_live\\aparapi\\samples\\convolution\\testcard.jpg"));
+      } catch (IOException e) {
+         throw new RuntimeException(e);
+      }
+   }
+
+   private static final boolean TEST_JTP = false;
+
+   public static void main(String[] ignored) {
+      System.setProperty("com.amd.aparapi.enableShowGeneratedOpenCL", "true");
+      int[] argbs = testImage.getRGB(0, 0, testImage.getWidth(), testImage.getHeight(), null, 0, testImage.getWidth());
+      MedianKernel7x7 kernel = new MedianKernel7x7();
+      kernel._imageTypeOrdinal = MedianKernel7x7.RGB;
+      kernel._sourceWidth = testImage.getWidth();
+      kernel._sourceHeight = testImage.getHeight();
+      kernel._sourcePixels = argbs;
+      kernel._destPixels = new int[argbs.length];
+      if (TEST_JTP) {
+         kernel.setExecutionMode(Kernel.EXECUTION_MODE.JTP);
+      }
+      kernel.processImages(new MedianSettings(7));
+      BufferedImage out = new BufferedImage(testImage.getWidth(), testImage.getHeight(), BufferedImage.TYPE_INT_RGB);
+      out.setRGB(0, 0, testImage.getWidth(), testImage.getHeight(), kernel._destPixels, 0, testImage.getWidth());
+      ImageIcon icon1 = new ImageIcon(testImage);
+      JLabel label1 = new JLabel(icon1);
+      ImageIcon icon2 = new ImageIcon(out);
+      JLabel label2 = new JLabel(icon2);
+      JFrame frame = new JFrame("Test Median");
+      frame.setLayout(new FlowLayout());
+      frame.getContentPane().add(label1);
+      frame.getContentPane().add(label2);
+      frame.pack();
+      frame.setLocationRelativeTo(null);
+      frame.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);
+      frame.setVisible(true);
+
+      int reps = 20;
+      for (int rep = 0; rep < reps; ++rep) {
+         long start = System.nanoTime();
+         kernel.processImages(new MedianSettings(7));
+         long elapsed = System.nanoTime() - start;
+         System.out.println("elapsed = " + elapsed / 1000000f + "ms");
+      }
+   }
+}
diff --git a/samples/median/src/com/amd/aparapi/sample/median/MedianKernel7x7.java b/samples/median/src/com/amd/aparapi/sample/median/MedianKernel7x7.java
new file mode 100644
index 00000000..6cbece41
--- /dev/null
+++ b/samples/median/src/com/amd/aparapi/sample/median/MedianKernel7x7.java
@@ -0,0 +1,168 @@
+package com.amd.aparapi.sample.median;
+
+import com.amd.aparapi.*;
+
+/**
+ * Provides support for pixel windows of size no greater than 49 (e.g. 7x7).
+ * <p/>
+ * <p>Demonstrates use of __private array for (partial) sorting, also demonstrates @NoCl annotation for specialised use of ThreadLocal in JTP execution.
+ */
+public class MedianKernel7x7 extends Kernel {
+   public static final int CHANNEL_GRAY = -1;
+   public static final int CHANNEL_ALPHA = 0;
+   public static final int CHANNEL_RED = 1;
+   public static final int CHANNEL_GREEN = 2;
+   public static final int CHANNEL_BLUE = 3;
+
+   protected static final int MONOCHROME = 0;
+   protected static final int RGB = 1;
+   protected static final int ARGB = 2;
+
+   public static final int MAX_WINDOW_SIZE = 49;
+
+   protected int _imageTypeOrdinal;
+   protected int[] _sourcePixels;
+   protected int _sourceWidth;
+   protected int _sourceHeight;
+
+   protected int[] _destPixels;
+
+   // NB could also use suffix naming instead of annotation ... field would be named _window_$private$49
+   @PrivateMemorySpace(MAX_WINDOW_SIZE) private short[] _window = new short[MAX_WINDOW_SIZE];
+   @NoCL private static ThreadLocal<short[]> _threadLocalWindow = new ThreadLocal<short[]>() {
+      @Override
+      protected short[] initialValue() {
+         return new short[MAX_WINDOW_SIZE];
+      }
+   };
+   protected int _windowWidth;
+   protected int _windowHeight;
+
+   @NoCL
+   public void setUpWindow() {
+      _window = _threadLocalWindow.get();
+   }
+
+   public void processImages(MedianSettings settings) {
+      _windowWidth = settings.windowWidth;
+      _windowHeight = settings.windowHeight;
+      execute(_sourceWidth * _sourceHeight);
+   }
+
+   @Override
+   public void run() {
+      setUpWindow();
+      int index = getGlobalId();
+      int x = index % _sourceWidth;
+      int y = index / _sourceWidth;
+
+      int dx0 = -(_windowWidth / 2);
+      int dx1 = _windowWidth + dx0;
+      int dy0 = -(_windowHeight / 2);
+      int dy1 = _windowHeight + dy0;
+
+      int windowX0 = max(0, x + dx0);
+      int windowX1 = min(_sourceWidth, x + dx1);
+      int windowY0 = max(0, y + dy0);
+      int windowY1 = min(_sourceHeight, y + dy1);
+
+      int actualPixelCount = (windowX1 - windowX0) * (windowY1 - windowY0);
+      int medianPixel = 0;
+
+      if (_imageTypeOrdinal == MONOCHROME) {
+         populateWindow(CHANNEL_GRAY, windowX0, windowX1, windowY0, windowY1);
+         medianPixel = median(actualPixelCount);
+      } else {
+         int alpha = 0xff000000;
+         if (_imageTypeOrdinal == ARGB) {
+            populateWindow(CHANNEL_ALPHA, windowX0, windowX1, windowY0, windowY1);
+            alpha = median(actualPixelCount);
+         }
+         populateWindow(CHANNEL_RED, windowX0, windowX1, windowY0, windowY1);
+         int red = median(actualPixelCount);
+         populateWindow(CHANNEL_GREEN, windowX0, windowX1, windowY0, windowY1);
+         int green = median(actualPixelCount);
+         populateWindow(CHANNEL_BLUE, windowX0, windowX1, windowY0, windowY1);
+         int blue = median(actualPixelCount);
+         medianPixel = alpha << 24 | red << 16 | green << 8 | blue;
+      }
+
+      _destPixels[index] = medianPixel;
+   }
+
+   protected void populateWindow(int channel, int windowX0, int windowX1, int windowY0, int windowY1) {
+      int windowIndex = 0;
+      for (int u = windowX0; u < windowX1; ++u) {
+         for (int v = windowY0; v < windowY1; ++v) {
+            int argb = _sourcePixels[u + _sourceWidth * v];
+            int sourcePixel = valueForChannel(channel, argb);
+            setPixelWindowValue(windowIndex, (short) sourcePixel);
+            ++windowIndex;
+         }
+      }
+   }
+
+   protected final int valueForChannel(int channel, int argb) {
+      int sourcePixel = 0;
+      if (channel == CHANNEL_GRAY) {
+         sourcePixel = argb;
+      } else if (channel == CHANNEL_ALPHA) {
+         sourcePixel = (0xff000000 & argb) >>> 24;
+      } else if (channel == CHANNEL_RED) {
+         sourcePixel = (0x00ff0000 & argb) >>> 16;
+      } else if (channel == CHANNEL_GREEN) {
+         sourcePixel = (0x0000ff00 & argb) >>> 8;
+      } else if (channel == CHANNEL_BLUE) {
+         sourcePixel = 0x000000ff & argb;
+      }
+      return sourcePixel;
+   }
+
+   protected void setPixelWindowValue(int windowIndex, short value) {
+      _window[windowIndex] = value;
+   }
+
+   /**
+    * Fast median based on the following algorithm
+    * <pre>
+    *                   Author: Wirth, Niklaus
+    *                    Title: Algorithms + data structures = programs
+    *                Publisher: Englewood Cliffs: Prentice-Hall, 1976
+    * </pre>
+    */
+   protected final int median(int actualPixelCount) {
+      int i, j, L, m;
+      short x;
+
+      L = 0;
+      m = actualPixelCount - 1;
+      while (L < m) {
+         x = _window[(actualPixelCount / 2)];
+         i = L;
+         j = m;
+         do {
+            while (_window[i] < x) {
+               i++;
+            }
+            while (x < _window[j]) {
+               j--;
+            }
+            if (i <= j) {
+               short temp = _window[i];
+               _window[i] = _window[j];
+               _window[j] = temp;
+               i++;
+               j--;
+            }
+         } while (i <= j);
+
+         if (j < actualPixelCount / 2) {
+            L = i;
+         }
+         if (actualPixelCount / 2 < i) {
+            m = j;
+         }
+      }
+      return _window[(actualPixelCount / 2)];
+   }
+}
diff --git a/samples/median/src/com/amd/aparapi/sample/median/MedianSettings.java b/samples/median/src/com/amd/aparapi/sample/median/MedianSettings.java
new file mode 100644
index 00000000..0f79dc3d
--- /dev/null
+++ b/samples/median/src/com/amd/aparapi/sample/median/MedianSettings.java
@@ -0,0 +1,15 @@
+package com.amd.aparapi.sample.median;
+
+public class MedianSettings {
+   public final int windowWidth;
+   public final int windowHeight;
+
+   public MedianSettings(int windowSize) {
+      this(windowSize, windowSize);
+   }
+
+   public MedianSettings(int windowWidth, int windowHeight) {
+      this.windowWidth = windowWidth;
+      this.windowHeight = windowHeight;
+   }
+}
-- 
GitLab