Commit 19ec835c authored by Gary Frost's avatar Gary Frost
Browse files

Merge of MultiDimSupport and SupportGlobalLocalMemory branches.

parent 8e6403f1
......@@ -35,6 +35,7 @@
<fileset dir="test" includes="*/build.xml"/>
</subant>
<delete dir="examples\nbody\jogamp"/> <!-- we handle the jogamp delete here, save downloading each build -->
<delete file="test\codegen\junit-4.10.jar"/> <!-- we handle the junit delete here, save downloading each build -->
<ant dir="com.amd.aparapi.jni" target="clean"/>
<ant dir="com.amd.aparapi" target="clean"/>
</target>
......
This diff is collapsed.
......@@ -8,7 +8,7 @@
<delete file="aparapi.jar"/>
</target>
<target name="build">
<target name="build" depends="clean">
<mkdir dir="classes"/>
<javac destdir="classes" debug="on" includeAntRuntime="false" >
<src path="src/java"/>
......
......@@ -2090,9 +2090,9 @@ class ClassModel{
}
public boolean isStatic() {
return (Access.STATIC.bitIsSet(methodAccessFlags));
return (Access.STATIC.bitIsSet(methodAccessFlags));
}
AttributePool getAttributePool() {
return (methodAttributePool);
}
......
......@@ -175,6 +175,7 @@ class Config{
static String instructionListenerClassName = System.getProperty(propPkgName + ".instructionListenerClass");
static public InstructionListener instructionListener = null;
{
if (instructionListenerClassName != null && !instructionListenerClassName.equals("")) {
......
/*
Copyright (c) 2010-2011, Advanced Micro Devices, Inc.
All rights reserved.
Redistribution and use in source and binary forms, with or without modification, are permitted provided that the
following conditions are met:
Redistributions of source code must retain the above copyright notice, this list of conditions and the following
disclaimer.
Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following
disclaimer in the documentation and/or other materials provided with the distribution.
Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
If you use the software (in whole or in part), you shall adhere to all applicable U.S., European, and other export
laws, including but not limited to the U.S. Export Administration Regulations ("EAR"), (15 C.F.R. Sections 730 through
774), and E.U. Council Regulation (EC) No 1334/2000 of 22 June 2000. Further, pursuant to Section 740.6 of the EAR,
you hereby certify that, except pursuant to a license granted by the United States Department of Commerce Bureau of
Industry and Security or as otherwise permitted pursuant to a License Exception under the U.S. Export Administration
Regulations ("EAR"), you will not (1) export, re-export or release to a national of a country in Country Groups D:1,
E:1 or E:2 any restricted technology, software, or source code you receive hereunder, or (2) export to Country Groups
D:1, E:1 or E:2 the direct product of such technology or software, if such foreign produced direct product is subject
to national security controls as identified on the Commerce Control List (currently found in Supplement 1 to Part 774
of EAR). For the most current Country Group listings, or for additional information about the EAR or your obligations
under those regulations, please refer to the U.S. Bureau of Industry and Security's website at http://www.bis.doc.gov/.
*/
package com.amd.aparapi;
@SuppressWarnings("serial") class DeprecatedException extends AparapiException{
DeprecatedException(String msg) {
super(msg);
}
}
......@@ -89,16 +89,17 @@ import com.amd.aparapi.ClassModel.ConstantPool.MethodReferenceEntry;
* }
* </pre></blockquote>
* <p>
* To execute this kernel, first create a new instance of it and then call <code>execute(int globalSize)</code>.
* To execute this kernel, first create a new instance of it and then call <code>execute(Range _range)</code>.
* <p>
* <blockquote><pre>
* int[] values = new int[1024];
* // fill values array
* Range range = Range.create(values.length); // create a range 0..1024
* SquareKernel kernel = new SquareKernel(values);
* kernel.execute(values.length);
* kernel.execute(range);
* </pre></blockquote>
* <p>
* When <code>execute()</code> returns, all the executions of Kernel.run() have completed and the results are available in the <code>squares</code> array.
* When <code>execute(Range)</code> returns, all the executions of <code>Kernel.run()</code> have completed and the results are available in the <code>squares</code> array.
* <p>
* <blockquote><pre>
* int[] squares = kernel.getSquares();
......@@ -110,16 +111,19 @@ import com.amd.aparapi.ClassModel.ConstantPool.MethodReferenceEntry;
* A different approach to creating kernels that avoids extending Kernel is to write an anonymous inner class:
* <p>
* <blockquote><pre>
*
* final int[] values = new int[1024];
* // fill values array
* // fill the values array
* final int[] squares = new int[values.length];
* final Range range = Range.create(values.length);
*
* Kernel kernel = new Kernel(){
* public void run() {
* int gid = getGlobalID();
* squares[gid] = values[gid]*values[gid];
* }
* };
* kernel.execute(values.length);
* kernel.execute(range);
* for (int i=0; i< values.length; i++){
* System.out.printf("%4d %4d %8d\n", i, values[i], squares[i]);
* }
......@@ -141,15 +145,52 @@ public abstract class Kernel implements Cloneable{
}
@Retention(RetentionPolicy.RUNTIME) @interface OpenCLDelegate {
}
/**
* We can use this Annotation to 'tag' intended local buffers.
*
* So we can either annotate the buffer
* <pre><code>
* &#64Local int[] buffer = new int[1024];
* </code></pre>
* Or use a special suffix
* <pre><code>
* int[] buffer_$local$ = new int[1024];
* </code></pre>
*
* @see LOCAL_SUFFIX
*
*
*/
public @Retention(RetentionPolicy.RUNTIME) @interface Local {
}
/**
* We can use this suffix to 'tag' intended local buffers.
*
*
* So either name the buffer
* <pre><code>
* int[] buffer_$local$ = new int[1024];
* </code></pre>
* Or use the Annotation form
* <pre><code>
* &#64Local int[] buffer = new int[1024];
* </code></pre>
*/
final static String LOCAL_SUFFIX = "_$local$";
private static Logger logger = Logger.getLogger(Config.getLoggerName());
public abstract class Entry{
public abstract void run();
public Kernel execute(int _globalSize) {
return (Kernel.this.execute("foo", _globalSize, 1));
public Kernel execute(Range _range) {
return (Kernel.this.execute("foo", _range, 1));
}
}
......@@ -292,26 +333,30 @@ public abstract class Kernel implements Cloneable{
private EXECUTION_MODE executionMode = EXECUTION_MODE.getDefaultExecutionMode();
private int globalId;
private int localId;
private int localSize;
int[] globalId = new int[] {
0,
0,
0
};
private int globalSize;
int[] localId = new int[] {
0,
0,
0
};
private int groupId;
int[] groupId = new int[] {
0,
0,
0
};
private int passId;
Range range;
private int numGroups;
int passId;
volatile CyclicBarrier localBarrier;
void setGlobalId(int _globalId) {
globalId = _globalId;
}
/**
* Determine the globalId of an executing kernel.
* <p>
......@@ -349,19 +394,26 @@ public abstract class Kernel implements Cloneable{
*/
@OpenCLDelegate protected final int getGlobalId() {
return (globalId);
return (getGlobalId(0));
}
void setGroupId(int _groupId) {
groupId = _groupId;
@OpenCLDelegate protected final int getGlobalId(int _dim) {
return (globalId[_dim]);
}
void setPassId(int _passId) {
passId = _passId;
/*
@OpenCLDelegate protected final int getGlobalX() {
return (getGlobalId(0));
}
}
@OpenCLDelegate protected final int getGlobalY() {
return (getGlobalId(1));
}
@OpenCLDelegate protected final int getGlobalZ() {
return (getGlobalId(2));
}
*/
/**
* Determine the groupId of an executing kernel.
* <p>
......@@ -394,9 +446,26 @@ public abstract class Kernel implements Cloneable{
* @return The groupId for this Kernel being executed
*/
@OpenCLDelegate protected final int getGroupId() {
return (groupId);
return (getGroupId(0));
}
@OpenCLDelegate protected final int getGroupId(int _dim) {
return (groupId[_dim]);
}
/*
@OpenCLDelegate protected final int getGroupX() {
return (getGroupId(0));
}
@OpenCLDelegate protected final int getGroupY() {
return (getGroupId(1));
}
@OpenCLDelegate protected final int getGroupZ() {
return (getGroupId(2));
}
*/
/**
* Determine the passId of an executing kernel.
* <p>
......@@ -416,10 +485,6 @@ public abstract class Kernel implements Cloneable{
return (passId);
}
void setLocalId(int _localId) {
localId = _localId;
}
/**
* Determine the local id of an executing kernel.
* <p>
......@@ -451,9 +516,26 @@ public abstract class Kernel implements Cloneable{
* @return The local id for this Kernel being executed
*/
@OpenCLDelegate protected final int getLocalId() {
return (localId);
return (getLocalId(0));
}
@OpenCLDelegate protected final int getLocalId(int _dim) {
return (localId[_dim]);
}
/*
@OpenCLDelegate protected final int getLocalX() {
return (getLocalId(0));
}
@OpenCLDelegate protected final int getLocalY() {
return (getLocalId(1));
}
@OpenCLDelegate protected final int getLocalZ() {
return (getLocalId(2));
}
*/
/**
* Determine the size of the group that an executing kernel is a member of.
* <p>
......@@ -472,9 +554,26 @@ public abstract class Kernel implements Cloneable{
* @return The size of the currently executing group.
*/
@OpenCLDelegate protected final int getLocalSize() {
return (localSize);
return (range.getLocalSize(0));
}
@OpenCLDelegate protected final int getLocalSize(int _dim) {
return (range.getLocalSize(_dim));
}
/*
@OpenCLDelegate protected final int getLocalWidth() {
return (range.getLocalSize(0));
}
@OpenCLDelegate protected final int getLocalHeight() {
return (range.getLocalSize(1));
}
@OpenCLDelegate protected final int getLocalDepth() {
return (range.getLocalSize(2));
}
*/
/**
* Determine the value that was passed to <code>Kernel.execute(int globalSize)</code> method.
*
......@@ -486,14 +585,26 @@ public abstract class Kernel implements Cloneable{
* @return The value passed to <code>Kernel.execute(int globalSize)</code> causing the current execution.
*/
@OpenCLDelegate protected final int getGlobalSize() {
return (globalSize);
return (range.getGlobalSize(0));
}
void setNumGroups(int _numGroups) {
numGroups = _numGroups;
@OpenCLDelegate protected final int getGlobalSize(int _dim) {
return (range.getGlobalSize(_dim));
}
/*
@OpenCLDelegate protected final int getGlobalWidth() {
return (range.getGlobalSize(0));
}
@OpenCLDelegate protected final int getGlobalHeight() {
return (range.getGlobalSize(1));
}
@OpenCLDelegate protected final int getGlobalDepth() {
return (range.getGlobalSize(2));
}
*/
/**
* Determine the number of groups that will be used to execute a kernel
* <p>
......@@ -509,9 +620,26 @@ public abstract class Kernel implements Cloneable{
* @return The number of groups that kernels will be dispatched into.
*/
@OpenCLDelegate protected final int getNumGroups() {
return (numGroups);
return (range.getNumGroups(0));
}
@OpenCLDelegate protected final int getNumGroups(int _dim) {
return (range.getNumGroups(_dim));
}
/*
@OpenCLDelegate protected final int getNumGroupsWidth() {
return (range.getGroups(0));
}
@OpenCLDelegate protected final int getNumGroupsHeight() {
return (range.getGroups(1));
}
@OpenCLDelegate protected final int getNumGroupsDepth() {
return (range.getGroups(2));
}
*/
/**
* The entry point of a kernel.
*
......@@ -529,9 +657,21 @@ public abstract class Kernel implements Cloneable{
@Override protected Object clone() {
try {
Kernel worker = (Kernel) super.clone();
// if there are any private buffers, go thru the fields here
// and allocate a new instance for each clone
worker.groupId = new int[] {
0,
0,
0
};
worker.localId = new int[] {
0,
0,
0
};
worker.globalId = new int[] {
0,
0,
0
};
return worker;
} catch (CloneNotSupportedException e) {
// TODO Auto-generated catch block
......@@ -1373,23 +1513,12 @@ public abstract class Kernel implements Cloneable{
* Java version is identical to localBarrier()
*
* @annotion Experimental
* @deprecated
*/
@OpenCLDelegate @Annotations.Experimental protected final void globalBarrier() {
try {
localBarrier.await();
} catch (InterruptedException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (BrokenBarrierException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
final void setSizes(int _globalSize, int _localSize) {
localSize = _localSize;
globalSize = _globalSize;
@OpenCLDelegate @Annotations.Experimental @Deprecated() protected final void globalBarrier() throws DeprecatedException {
throw new DeprecatedException(
"Kernel.globalBarrier() has been deprecated. It was based an incorrect understanding of OpenCL functionality.");
}
......@@ -1441,23 +1570,38 @@ public abstract class Kernel implements Cloneable{
}
/**
* Start execution of <code>globalSize</code> kernels.
* Start execution of <code>_range</code> kernels.
* <p>
* When <code>kernel.execute(globalSize)</code> is invoked, Aparapi will schedule the execution of <code>globalSize</code> kernels. If the execution mode is GPU then
* the kernels will execute as OpenCL code on the GPU device. Otherwise, if the mode is JTP, the kernels will execute as a pool of Java threads on the CPU.
* <p>
* @param _globalSize The number of Kernels that we would like to initiate.
* @param range The number of Kernels that we would like to initiate.
* @returnThe Kernel instance (this) so we can chain calls to put(arr).execute(range).get(arr)
*
*/
public synchronized Kernel execute(Range _range) {
return (execute(_range, 1));
}
/**
* Start execution of <code>_range</code> kernels.
* <p>
* When <code>kernel.execute(_range)</code> is invoked, Aparapi will schedule the execution of <code>_range</code> kernels. If the execution mode is GPU then
* the kernels will execute as OpenCL code on the GPU device. Otherwise, if the mode is JTP, the kernels will execute as a pool of Java threads on the CPU.
* <p>
* Since adding the new <code>Range class</code> this method offers backward compatibility and merely defers to <code> return (execute(Range.create(_range), 1));</code>.
* @param _range The number of Kernels that we would like to initiate.
* @returnThe Kernel instance (this) so we can chain calls to put(arr).execute(range).get(arr)
*
*/
public synchronized Kernel execute(int _globalSize) {
return (execute(_globalSize, 1));
public synchronized Kernel execute(int _range) {
return (execute(Range.create(_range), 1));
}
/**
* Start execution of <code>_passes</code> iterations of <code>globalSize</code> kernels.
* Start execution of <code>_passes</code> iterations of <code>_range</code> kernels.
* <p>
* When <code>kernel.execute(globalSize, passes)</code> is invoked, Aparapi will schedule the execution of <code>globalSize</code> kernels. If the execution mode is GPU then
* When <code>kernel.execute(_range, _passes)</code> is invoked, Aparapi will schedule the execution of <code>_reange</code> kernels. If the execution mode is GPU then
* the kernels will execute as OpenCL code on the GPU device. Otherwise, if the mode is JTP, the kernels will execute as a pool of Java threads on the CPU.
* <p>
* @param _globalSize The number of Kernels that we would like to initiate.
......@@ -1465,8 +1609,23 @@ public abstract class Kernel implements Cloneable{
* @return The Kernel instance (this) so we can chain calls to put(arr).execute(range).get(arr)
*
*/
public synchronized Kernel execute(int _globalSize, int _passes) {
return (execute("run", _globalSize, _passes));
public synchronized Kernel execute(Range _range, int _passes) {
return (execute("run", _range, _passes));
}
/**
* Start execution of <code>_passes</code> iterations over the <code>_range</code> of kernels.
* <p>
* When <code>kernel.execute(_range)</code> is invoked, Aparapi will schedule the execution of <code>_range</code> kernels. If the execution mode is GPU then
* the kernels will execute as OpenCL code on the GPU device. Otherwise, if the mode is JTP, the kernels will execute as a pool of Java threads on the CPU.
* <p>
* Since adding the new <code>Range class</code> this method offers backward compatibility and merely defers to <code> return (execute(Range.create(_range), 1));</code>.
* @param _range The number of Kernels that we would like to initiate.
* @returnThe Kernel instance (this) so we can chain calls to put(arr).execute(range).get(arr)
*
*/
public synchronized Kernel execute(int _range, int _passes) {
return (execute(Range.create(_range), _passes));
}
/**
......@@ -1480,12 +1639,12 @@ public abstract class Kernel implements Cloneable{
* @return The Kernel instance (this) so we can chain calls to put(arr).execute(range).get(arr)
*
*/
public synchronized Kernel execute(Entry _entry, int _globalSize) {
public synchronized Kernel execute(Entry _entry, Range _range) {
if (kernelRunner == null) {
kernelRunner = new KernelRunner(this);
}
return (kernelRunner.execute(_entry, _globalSize, 1));
return (kernelRunner.execute(_entry, _range, 1));
}
/**
......@@ -1499,8 +1658,8 @@ public abstract class Kernel implements Cloneable{
* @return The Kernel instance (this) so we can chain calls to put(arr).execute(range).get(arr)
*
*/
public synchronized Kernel execute(String _entrypoint, int _globalSize) {
return (execute(_entrypoint, _globalSize, 1));
public synchronized Kernel execute(String _entrypoint, Range _range) {
return (execute(_entrypoint, _range, 1));
}
......@@ -1515,12 +1674,12 @@ public abstract class Kernel implements Cloneable{
* @return The Kernel instance (this) so we can chain calls to put(arr).execute(range).get(arr)
*
*/
public synchronized Kernel execute(String _entrypoint, int _globalSize, int _passes) {
public synchronized Kernel execute(String _entrypoint, Range _range, int _passes) {
if (kernelRunner == null) {
kernelRunner = new KernelRunner(this);
}
return (kernelRunner.execute(_entrypoint, _globalSize, _passes));
return (kernelRunner.execute(_entrypoint, _range, _passes));
}
......@@ -1693,11 +1852,6 @@ public abstract class Kernel implements Cloneable{
return (false);
}
void setLocalSize(int _localSize) {
localSize = _localSize;
}
// the flag useNullForLocalSize is useful for testing that what we compute for localSize is what OpenCL
// would also compute if we passed in null. In non-testing mode, we just call execute with the
// same localSize that we computed in getLocalSizeJNI. We don't want do publicize these of course.
......
......@@ -43,10 +43,11 @@ import java.util.Iterator;
import java.util.List;
import java.util.Map;
import com.amd.aparapi.ClassModel.ClassModelField;
import com.amd.aparapi.ClassModel.AttributePool.LocalVariableTableEntry;
import com.amd.aparapi.ClassModel.AttributePool.LocalVariableTableEntry.LocalVariableInfo;
import com.amd.aparapi.ClassModel.AttributePool.RuntimeAnnotationsEntry;
import com.amd.aparapi.ClassModel.ClassModelField;
import com.amd.aparapi.ClassModel.AttributePool.LocalVariableTableEntry.LocalVariableInfo;
import com.amd.aparapi.ClassModel.AttributePool.RuntimeAnnotationsEntry.AnnotationInfo;
import com.amd.aparapi.ClassModel.ConstantPool.FieldEntry;