From 6979d29c680d223320391476b823420b181d2226 Mon Sep 17 00:00:00 2001 From: Mohamed Ibrahim <mibrahim@mibrahim.net> Date: Sat, 29 Aug 2015 09:00:41 -0400 Subject: [PATCH] doc/Attribution.md doc/DevelopersGuideLinux.md doc/DevelopersGuideWindows.md doc/EmulatingMultipleEntrypointsUsingCurrentAPI.md doc/MultipleEntryPointSupportProposal.md --- doc/Attribution.md | 26 ++ doc/DevelopersGuideLinux.md | 181 +++++++++ doc/DevelopersGuideWindows.md | 187 +++++++++ ...atingMultipleEntrypointsUsingCurrentAPI.md | 226 +++++++++++ doc/MultipleEntryPointSupportProposal.md | 377 ++++++++++++++++++ doc/README.md | 6 +- 6 files changed, 1000 insertions(+), 3 deletions(-) create mode 100644 doc/Attribution.md create mode 100644 doc/DevelopersGuideLinux.md create mode 100644 doc/DevelopersGuideWindows.md create mode 100644 doc/EmulatingMultipleEntrypointsUsingCurrentAPI.md create mode 100644 doc/MultipleEntryPointSupportProposal.md diff --git a/doc/Attribution.md b/doc/Attribution.md new file mode 100644 index 00000000..52ab3813 --- /dev/null +++ b/doc/Attribution.md @@ -0,0 +1,26 @@ +#Attribution +*Attribution Updated Sep 13, 2011 by frost.g...@gmail.com* + +##Attribution + +AMD, AMD Radeon, the AMD arrow logo, and combinations thereof are trademarks of Advanced Micro Devices, Inc. + +OpenCL is a trademark of Apple Inc used under license to the Khronos Group, Inc. + +NVIDIA, the NVIDIA logo, and CUDA are trademarks or registered trademarks of NVIDIA Corporation. + +Java , JVM, JDK and “Write Once, Run Anywhere" are trademarks of Oracle and/or its affiliates. + +Eclipse and the related logos are a trademark of The Eclipse Foundation in the United States, other countries, or both. + +Microsoft, Windows, Visual Studio, Visual Studio Express Edition are trademarks of Microsoft Corporation in the United States, other countries, or both. + +Linux is a registered trademark of Linus Torvalds + +Ubuntu is a trademark of Canonical Ltd + +Red Hat is a registered trademark of Red Hat, Inc. in the United States and other countries. + +OpenGL® and the oval logo are trademarks or registered trademarks of Silicon Graphics, Inc. in the United States and/or other countries worldwide. + +All other names used in this documentation are for identification purposes only and may be trademarks of their respective owners. diff --git a/doc/DevelopersGuideLinux.md b/doc/DevelopersGuideLinux.md new file mode 100644 index 00000000..c14da3a2 --- /dev/null +++ b/doc/DevelopersGuideLinux.md @@ -0,0 +1,181 @@ +#DevelopersGuideLinux + +*Developer guide for Linux. Updated Aug 23, 2012 by frost.g...@gmail.com* + +#Aparapi Developer Guide: Linux® 32- and 64-bit platforms + +##SVN Client + +To contribute to Aparapi you will need an SVN client to access the latest source code. This page lists a number of SVN client providers [http://subversion.apache.org/packages.html](http://subversion.apache.org/packages.html) Also you might want to consider one of the SVN-based plugins for Eclipse®. http://wiki.eclipse.org/SVN_Howto +OpenJDK or Oracle® Java JDK install (JDK1.6 or later) + +http://OpenJDK.java.net http://www.oracle.com/technetwork/java/javase/downloads/index.html + +Many Linux® distributions come with Java JDK pre-installed or available as an optional install component. Sometimes the version that comes pre-installed is GCJ (http://gcc.gnu.org/java/). For Aparapi you will need to ensure that you have a copy of the JDK from either the OpenJDK project or from Oracle®. + +The Oracle® J2SE JDK site contains downloads and documentation showing how to install for various Linux distributions. + +http://www.oracle.com/technetwork/java/javase/index-137561.html + +Here is an example for my Ubuntu system: + + $ sudo apt-get install sun-java6-jdk sun-java6-jre + +When the installation is complete, ensure that your JAVA_HOME environment variable is pointing to the install location (such as /usr/lib/jvm/java-6-sun-1.6.0.26). + + $ export JAVA_HOME=/usr/lib/jvm/java-6-sun-1.6.0.26 + +You should also add ${JAVA_HOME}/bin to your path. + + $ export PATH=$PATH}:${JAVA_HOME}/bin + +Double-check your path and ensure that there is not another JDK/JRE in your path. + + $ java -version + java version "1.6.0_26" + Java(TM) SE Runtime Environment (build 1.6.0_26-b03) + Java HotSpot(TM) Client VM (build 20.1-b02, mixed mode, sharing) + +##Apache Ant + +Apache Ant® can be downloaded from the apache project page http://ant.apache.org + +Aparapi has been tested using 1.7.1 version of Ant. It may work with earlier versions, but if you encounter issues we recommend updating to at least 1.7.1 before reporting issues. Here is an example for installing Ant on Ubuntu : + + $ apt-get install ant + +Ensure that ANT_HOME is set to the install dir. + + $ export ANT_HOME=/usr/local/ant + +Add `${ANT_HOME}/bin` to your path. + + $ export PATH=$PATH}:${ANT_HOME}/bin + +Double-check the installation and environment vars. + + ant -version + Apache Ant version 1.7.1 compiled ... + +##AMD APP SDK + +To compile Aparapi JNI code you need access to OpenCL headers and libraries. The instructions below assume that there is an available AMD APP SDK v2.5® (or later) installed and that your platform supports the required device drivers for your GPU card. Install the Catalyst driver first, and then install AMD APP SDK v2.5® or later. + +See http://developer.amd.com/sdks/AMDAPPSDK/pages/DriverCompatibility.aspx for help locating the appropriate driver for your AMD card. Make sure you install the catalyst driver that includes the OpenCL™ runtime components. + + The OpenCL™ runtime is required for executing Aparapi or OpenCL™ on your GPU or GPU, but it is not necessary for building/compiling Aparapi. + The AMD APP SDK v2.5 is necessary for compiling the Aparapi JNI code against OpenCL™ APIs. + +Once you have a suitable driver, download a copy of AMD APP SDK v2.5 or later from http://developer.amd.com/sdks/AMDAPPSDK/downloads/Pages/default.aspx. + +Download the installation guide for Microsoft® Windows® (and Linux®) from http://developer.amd.com/sdks/AMDAPPSDK/assets/AMD_APP_SDK_Installation_Notes.pdf. Note that if you updating from a previous version of AMD APP SDK (or its predecessor ATI STREAM SDK), first uninstall the previous version. + +Download the release notes from: http://developer.amd.com/sdks/AMDAPPSDK/assets/AMD_APP_SDK_Release_Notes_Developer.pdf +GCC compiler (G++) for your Linux 32-bit or 64-bit platform + +Aparapi has been tested with 32-bit and 64-bit Linux 4.1.2 or later GCC compilers. + +Ensure you have the g++ toolchain installed: + + $ g++ + no input files + +##JUnit + +The initial Open Source drop includes a suite of JUnit tests for validating bytecode to OpenCL™ code generation. These tests require JUnit 4. + +Download JUnit from http://www.junit.org/ and note the location of your JUnit installation; the location is needed to configure the test\codegen\build.xml file. Please see the UnitTestGuide page. + +##Eclipse + +Eclipse is not required to build Aparapi; however, the developers of Aparapi do use Eclipse and have made the Eclipse artifacts (.classpath and .project files) available so that projects can be imported into Eclipse. The com.amd.aparapi.jni subproject (containing C++ JNI source) should be imported as a resource project. We do not recommend importing com.amd.aparapi.jni as a CDT project, and we do not recommend trying to configure a CDT build, the existing build.xml files has been customized for multiplatform C++ compilations. + +##Building + +Check out the Aparapi SVN trunk: + + $ svn checkout http://aparapi.googlecode.com/svn/trunk aparapi + +Checkout provides the following: + + aparapi/ + com.amd.aparapi/ + src/java/com.amd.aparapi/*.java + build.xml + com.amd.aparapi.jni/ + src/cpp/*.cpp + src/cpp/*.h + build.xml + test/ + codegen/ + src/java/ + com.amd.aparapi/ + com.amd.aparapi.test/ + build.xml + runtime/ + src/java/ + com.amd.aparapi/ + com.amd.aparapi.test/ + build.xml + samples/ + mandel + src/java/com.amd.aparapi.samples.mandel/*.java + build.xml + mandel.sh + mandel.bat + squares/ + src/java/com.amd.aparapi.samples.squares/*.java + build.xml + squares.sh + squares.bat + convolution/ + src/java/com.amd.aparapi.samples.convolution/*.java + build.xml + conv.sh + conv.bat + examples/ + nbody/ + src/java/com.amd.aparapi.nbody/ + build.xml + nbody.sh + nbody.bat + build.xml + README.txt + LICENSE.txt + CREDITS.txt + +##Sub Directories + +The com.amd.aparapi and com.amd.aparapi.jni subdirectories contain the source for building and using Aparapi. + +The ant build.xml file, in each folder accept common 'clean' and 'build' targets. You can use the build.xml file at the root of the tree for two purposes: + + To initiate a build com.amd.aparapi of com.amd.aparapi.jni. + To create a binary ‘distribution’ directory and zip file. This zip file is same as those available from the download section of the code.google.com/p/aparapi site. + +##Preparing for your first build + +Edit com.amd.aparapi.jni\build.properties and ensure that the properties are valid for your platform. + +View the comments in the properties file for assistance. The build.xml ant file contains some simple checks to help diagnose simple configuration errors in case something gets messed up. + +For Linux you should not need to edit build.xml unless your APP SDK install location differs from the default. The default for Linux® is /opt/AMDAPP + + amd.app.sdk.dir=/opt/AMDAPP + +Perform a build from the root directory using the following command: + + $ ant clean build dist + +Once your build has completed you should see an additional subdirectory named dist_linux_x86 or dist_linux_x86_64 (depending on the bitness of your platform). + +The distribution directory contains: + + aparapi.jar containing Aparapi classes for all platforms. + the shared library for your platform (aparapi_x86.so or aparapi_x86_64.so). + an /api subdirectory containing the 'public' javadoc for Aparapi. + a samples directory containing the source and binaries for the mandel and squares sample projects. + +The root directory also contains either dist_linux_x86_64.zip or dist_linux_x86.zip containing a compressed archive of the distribution tree. + +[Attribution](Attribution.md) diff --git a/doc/DevelopersGuideWindows.md b/doc/DevelopersGuideWindows.md new file mode 100644 index 00000000..dd0c386f --- /dev/null +++ b/doc/DevelopersGuideWindows.md @@ -0,0 +1,187 @@ +#DevelopersGuideWindows +*Developers guide for Windows. Updated Aug 23, 2012 by frost.g...@gmail.com* + +##Aparapi Developer Guide: Microsoft® Windows® 32- and 64-bit platforms + +##SVN Client + +To contribute to Aparapi you will need an SVN client to access the latest source code. + +This page lists a number of SVN client providers http://subversion.apache.org/packages.html + +For Microsoft Windows® users TortoiseSVN incorporates SVN functionality directly into Windows Explorer view and is often preferred http://tortoisesvn.tigris.org/ + +Also you might want to consider one of the SVN-based plugins for Eclipse. http://wiki.eclipse.org/SVN_Howto +Oracle® Java JDK install (JDK1.6 or later) + +http://www.oracle.com/technetwork/java/javase/downloads/index.html + +The Oracle® J2SE JDK site contains downloads and documentation showing how to install for various platforms. http://www.oracle.com/technetwork/java/javase/index-137561.html + +When the installation is complete, ensure that your JAVA_HOME environment variable is pointing to the install location (such as c:\progra~1\java\jdk1.6.0_26)and that %JAVA_HOME%\bin is in your path. + + C:> set JAVA_HOME=c:\progra~1\java\jdk1.6.0_26 + C:> set PATH=%PATH%;%JAVA_HOME%\bin + +Note that we tend to use the 8.3 form of Microsoft® Windows® path variables this avoids us having to quote paths in scripts. + +Double check your path and ensure that there is not another JDK/JRE in your path. + + C:> java -version + java version "1.6.0_26" + Java(TM) SE Runtime Environment (build 1.6.0_26-b03) + Java HotSpot(TM) Client VM (build 20.1-b02, mixed mode, sharing) + +##Apache Ant + +Apache Ant™ can be downloaded from the apache project page http://ant.apache.org + +Aparapi has been tested using 1.7.1 version of Ant, it may well work with earlier versions, but if you encounter issues we recommend updating to at least 1.7.1 before reporting issues. Installation is straightforward, just unzip the ant.zip file and ensure that your ANT_HOME}} environment variable is pointing to your ANT installation and that `{{{%ANT_HOME%\bin` is in your path. + + C:> set ANT_HOME=C:\progra~1\apache\apache-ant-1.8.1 + C:> set PATH=%PATH%;%ANT_HOME%\bin + +Double check the installation and environment vars. + + ant -version + Apache Ant version 1.7.1 compiled .. + +##AMD APP SDK + +To compile Aparapi JNI code you need access to OpenCL headers and libraries. The instructions below assume that there is an available AMD APP SDK v2.5 (or later) installed and that your platform supports the required device drivers for your GPU card. Install the Catalyst driver first, and then install AMD APP SDK v2.5. + +See http://developer.amd.com/sdks/AMDAPPSDK/pages/DriverCompatibility.aspx for help locating the appropriate driver for your AMD card. Be sure you obtain the catalyst driver that includes the OpenCL™ runtime components. + + The OpenCL™ runtime is required for executing Aparapi or OpenCL™ on your CPU or GPU, but it is not necessary for building/compiling Aparapi. + The AMD APP SDK v2.5 is necessary for compiling the Aparapi JNI code against OpenCL™ APIs. + +Once you have a suitable driver, download a copy of AMD APP SDK v2.5 from http://developer.amd.com/sdks/AMDAPPSDK/downloads/Pages/default.aspx. + +Download the installation guide for Microsoft® Windows® (and Linux®) from http://developer.amd.com/sdks/AMDAPPSDK/assets/AMD_APP_SDK_Installation_Notes.pdf. Note that if you updating from a previous version of AMD APP SDK (or its predecessor ATI STREAM SDK), first uninstall the previous version. The release notes are available here http://developer.amd.com/sdks/AMDAPPSDK/assets/AMD_APP_SDK_Release_Notes_Developer.pdf +##A C++ compiler + +For Microsoft® Windows® platforms the JNI build can support either Microsoft® Visual Studio® 2008, 2009 or 2010 compiler or MinGW (Minimal GNU for Windows) from GNU. Now that Visual Studio express is available for free, we would recommend using Visual studio. If you wish to use another compiler then you will have to tweak the com.amd.aparapi.jni/build.xml file to get your compiler to work. +Microsoft® Visual Studio® 2008/2010 for 32-bit or 64-bit platforms + +Aparapi has been tested with various versions of Microsoft® Visual Studio® 2008, 2009 and 2010 including Enterprise, Professional and Express editions, if you encounter any version specific issues please let us know so we can address it and/or update this documentation. + +If you already have Microsoft® Visual Studio® installed you will need to know the location of the compiler and the SDK. These can vary depending upon the platform and version you are using. Typically an install results in a Visual Studio install, such as. c:\Program Files\Microsoft Visual Studio 9.0 + +And an SDK, such as. c:\Program Files\Microsoft SDKs\Windows\v6.0A + +Note the location of both of these as this information will be needed to configure the com.amd.aparapi.jni\build.property file (later). +For Visual Studio Express 64 bit users + +Visual studio express does not include the 64 bit compiler or libraries. You will need to also install the SDK from Microsoft. this link should help +##MinGW – (MINimum Gnu for Windows) + +As an alternative to installing Microsoft® Visual Studio® we have included support for the MinGW tool chain and Aparapi has been (minimally) tested with this compiler. + +MingGW can be downloaded from http://www.mingw.org/ by following the instructions on their Getting Started page. We recommend installing the mingw-get-inst msi installer and just taking the defaults. + +Note the install location as this information will be needed to edit build.xml file and uncomment the line referencing the mingw instal dir. Typically the install location is + + C:\MinGW + +After a successful build, you will need to ensure that the bin sub directory is in your path before you attempting to run an Aparapi enabled application built using MinGW. MinGW apps require access to MingGW/GNU C++/C runtime at execution time. + + set PATH=%PATH%;C:\MinGW\bin + +This is one reason the binary distribution is ''not'' built using mingw. +##JUnit + +The initial Open Source drop includes a suite of JUnit tests for validating bytecode to OpenCL code generation. These tests require JUnit 4. + +Download JUnit from http://www.junit.org/ + +Note the location of your JUnit installation; the location is needed to configure the test\codegen\build.xml file. See the UnitTestGuide page for howto configure the JUnit build. +##Eclipse + +Eclipse is not required to build Aparapi, however the developers of Aparapi do use Eclipse and have made the Eclipse artifacts (.classpath and .project files) available so that projects can be imported into Eclipse. + +The com.amd.aparapi.jni subproject (containing C++ JNI source) should be imported as a resource project, we do not recommend importing com.amd.aparapi.jni as a CDT project, and we do not recommend trying to configure a CDT build, the existing build.xml files has been customized for multiplatform C++ compilations. +##Building + +Check out the Aparapi SVN trunk: + +svn checkout http://aparapi.googlecode.com/svn/trunk + +You will end up with the following files/directories + + aparapi/ + com.amd.aparapi/ + src/java/com.amd.aparapi/*.java + build.xml + com.amd.aparapi.jni/ + src/cpp/*.cpp + src/cpp/*.h + build.xml + test/ + codegen/ + src/java/ + com.amd.aparapi/ + com.amd.aparapi.test/ + build.xml + runtime/ + src/java/ + com.amd.aparapi/ + com.amd.aparapi.test/ + build.xml + samples/ + mandel + src/java/com.amd.aparapi.samples.mandel/*.java + build.xml + mandel.sh + mandel.bat + squares/ + src/java/com.amd.aparapi.samples.squares/*.java + build.xml + squares.sh + squares.bat + convolution/ + src/java/com.amd.aparapi.samples.convolution/*.java + build.xml + conv.sh + conv.bat + examples/ + nbody/ + src/java/com.amd.aparapi.nbody/ + build.xml + nbody.sh + nbody.bat + build.xml + README.txt + LICENSE.txt + CREDITS.txt + +##Sub Directories + +The com.amd.aparapi and com.amd.aparapi.jni subdirectories contain the source for building and using Aparapi. + +The ant build.xml file, in each folder accept 'clean' and 'build' targets. + +Use the build.xml file at the root of the tree for two purposes: + + To initiate a build of com.amd.aparapi and com.amd.aparapi.jni. + To create a binary distribution directory and zip file. This zip file is same as those available from the download section of the code.google.com/p/aparapi site. + +##Preparing for your first build + +You should only need to edit com.amd.aparapi.jni\build.xml file if you wish to use mingw or if you Visual Studio or gcc compiler is in an unusual place. + +Perform a build from the root directory using the following command: + + $ ant clean dist + +The jni build will perform some simple tests to check the configuration properties and hopefully also guide you to a possible solution. + +Once your build has completed you should see an additional subdirectory named dist_windows_x86 or dist_windows_x86_64 (depending upon your platform type). + + aparapi.jar containing Aparapi classes for all platforms. + the shared library for your platform (aparapi_x86.dll or aparapi_x86_64.dll). + an /api subdirectory containing the 'public' javadoc for Aparapi. + a samples directory containing the source and binaries for the mandel and squares sample projects. + +The root directory also contains either dist_windows_x86_64.zip or dist_windows_x86.zip containing a compressed archive of the distribution tree. + +[Attribution](Attribution.md) diff --git a/doc/EmulatingMultipleEntrypointsUsingCurrentAPI.md b/doc/EmulatingMultipleEntrypointsUsingCurrentAPI.md new file mode 100644 index 00000000..b34051f5 --- /dev/null +++ b/doc/EmulatingMultipleEntrypointsUsingCurrentAPI.md @@ -0,0 +1,226 @@ +#EmulatingMultipleEntrypointsUsingCurrentAPI +*How to emulate multiple entrypoints using existing Aparapi APIs Updated Jul 30, 2012 by frost.g...@gmail.com* + +##Emulating Multiple Entrypoints Using Existing Aparapi APIs + +Until we have support for multiple entrypoints in Aparapi, there are some tricks for emulating this feature. + +Follow the proposal for adding multiple entrypoints on this page [MultipleEntryPointSupportProposal](MultipleEntryPointSupportProposal.md). + +Suppose we wanted to create a general VectorMath kernel which might expose unary square, squareroot methods and binary addition and subtraction functionality. With our current API limitations we can't easily do this, we can approximate having separate methods by passing a separate arg to dictate the 'function' that we wish to perform. + + class VectorKernel extends Kernel{ + float[] lhsOperand; + float[] rhsOperand; + float[] unaryOperand; + float[] result; + final static int FUNC_ADD =0; + final static int FUNC_SUB =1; + final static int FUNC_SQR =2; + final static int FUNC_SQRT =3; + // other functions + int function; + @Override public void run(){ + int gid = getGlobalId(0){ + if (function==FUNC_ADD){ + result[gid]=lhsOperand[gid]+rhsOperand[gid]; + }else if (function==FUNC_SUB){ + result[gid]=lhsOperand[gid]-rhsOperand[gid]; + }else if (function==FUNC_SQR){ + result[gid]=unaryOperand[gid]*unaryOperand[gid]; + }else if (function==FUNC_ADD){ + result[gid]=sqrt(unaryOperand[gid]); + }else if .... + } + } + +To use this for adding two vectors and then take the sqrt of the result we would use something like.... + + int SIZE=1024; + Range range = Range.create(SIZE); + VectorKernel vk = new VectorKernel(); + vk.lhsOperand = new float[SIZE]; + vk.rhsOperand = new float[SIZE]; + vk.unaryOperand = new float[SIZE]; + vk.result = new float[SIZE]; + + // fill lhsOperand ommitted + // fill rhsOperand ommitted + vk.function = VectorKernel.FUNC_ADD; + vk.execute(range); + System.arrayCopy(vk.result, 0, vk.unaryOperand, 0, SIZE); + vk.function = VectorKernel.FUNC_SQRT; + vk.execute(range); + +This approach is fairly common and I have used it successfully to perform various pipeline stages for calculating FFT's for example. Whilst this is functional it is not a great solution. First the API is clumsy. We have to mutate the state of the kernel instance and then re-arrange the arrays manually to chain math operations. We could of course hide all of this behind helper methods. One could imagine for example an implementation which exposes helper add(lhs, rhs)}}, or {{{sqrt() which hid all the nasty stuff. + + class VectorKernel extends Kernel{ + float[] lhsOperand; + float[] rhsOperand; + float[] unaryOperand; + float[] result; + final static int FUNC_ADD =0; + final static int FUNC_SUB =1; + final static int FUNC_SQR =2; + final static int FUNC_SQRT =3; + // other functions + int function; + @Override public void run(){ + int gid = getGlobalId(0){ + if (function==FUNC_ADD){ + result[gid]=lhsOperand[gid]+rhsOperand[gid]; + }else if (function==FUNC_SUB){ + result[gid]=lhsOperand[gid]-rhsOperand[gid]; + }else if (function==FUNC_SQR){ + result[gid]=unaryOperand[gid]*unaryOperand[gid]; + }else if (function==FUNC_ADD){ + result[gid]=sqrt(unaryOperand[gid]); + }else if .... + } + private void binary(int operator, float[] lhs, float[] rhs){ + lhsOperand = lhs; + rhsOperand = rhs; + function=operator; + execute(lhs.length()); + } + public void add(float[] lhs, float[] rhs){ + binary(FUNC_ADD, lhs, rhs); + } + + public void sub(float[] lhs, float[] rhs){ + binary(FUNC_SUB, lhs, rhs); + } + + private void binary(int operator, float[] rhs){ + System.arrayCopy(result, 0, lhsOperand, result.length); + rhsOperand = rhs; + function=operator; + execute(lhsOperand.legth()); + } + + public void add(float[] rhs){ + binary(FUNC_ADD, rhs); + } + + public void sub( float[] rhs){ + binary(FUNC_SUB, rhs); + } + + private void unary(int operator, float[] unary){ + unaryOperand = unary; + function=operator; + execute(unaryOperand.length()); + } + + public void sqrt(float[] unary){ + unary(FUNC_SQRT, unary); + } + + private void unary(int operator){ + System.array.copy(result, 0, unaryOperand, 0, result.length); + function=operator; + execute(unaryOperand.length()); + } + + public void sqrt(){ + unary(FUNC_SQRT); + } + + } + + VectorKernel vk = new VectorKernel(SIZE); + vk.add(copyLhs, copyRhs); // copies args to lhs and rhs operands + // sets function type + // and executes kernel + vk.sqrt(); // because we have no arg + // copies result to unary operand + // sets function type + // execute kernel + +However there is one more objection to this approach, namely that it by default will force unnecessarily buffer copies. + +When the bytecode for the above Kernel.run() method is analyzed Aparapi finds bytecode reading from lhsOperand, rhsOperand and unaryOperand arrays/buffers. Obviously at this bytecode analysis stage we can't predict which 'function type' will be used, so on every executions (Kernel.run()) Aparapi must copy all three buffers to the GPU. For binary operations this is one buffer copy wasted (the unaryOperand), for the unary operations we copy two buffers unnecessarily (lhsOperand and rhsOperand). We can of course use explicit buffer management to help us reduce these costs. Ideally we add this to our helper methods. + + class VectorKernel extends Kernel{ + float[] lhsOperand; + float[] rhsOperand; + float[] unaryOperand; + float[] result; + final static int FUNC_ADD =0; + final static int FUNC_SUB =1; + final static int FUNC_SQR =2; + final static int FUNC_SQRT =3; + // other functions + int function; + @Override public void run(){ + int gid = getGlobalId(0){ + if (function==FUNC_ADD){ + result[gid]=lhsOperand[gid]+rhsOperand[gid]; + }else if (function==FUNC_SUB){ + result[gid]=lhsOperand[gid]-rhsOperand[gid]; + }else if (function==FUNC_SQR){ + result[gid]=unaryOperand[gid]*unaryOperand[gid]; + }else if (function==FUNC_ADD){ + result[gid]=sqrt(unaryOperand[gid]); + }else if .... + } + private void binary(int operator, float[] lhs, float[] rhs){ + lhsOperand = lhs; + rhsOperand = rhs; + function=operator; + put(lhsOperand).put(rhsOperand); + execute(lhs.length()); + get(result); + } + public void add(float[] lhs, float[] rhs){ + binary(FUNC_ADD, lhs, rhs); + } + + public void sub(float[] lhs, float[] rhs){ + binary(FUNC_SUB, lhs, rhs); + } + + private void binary(int operator, float[] rhs){ + System.arrayCopy(result, 0, lhsOperand, result.length); + rhsOperand = rhs; + function=operator; + put(lhsOperand).put(rhsOperand); + execute(lhsOperand.legth()); + get(result); + } + + public void add(float[] rhs){ + binary(FUNC_ADD, rhs); + } + + public void sub( float[] rhs){ + binary(FUNC_SUB, rhs); + } + + private void unary(int operator, float[] unary){ + unaryOperand = unary; + function=operator; + put(unaryOperand); + execute(unaryOperand.length()); + get(result); + } + + public void sqrt(float[] unary){ + unary(FUNC_SQRT, unary); + } + + private void unary(int operator){ + System.array.copy(result, 0, unaryOperand, 0, result.length); + function=operator; + put(unaryOperand); + execute(unaryOperand.length()); + get(result); + + } + + public void sqrt(){ + unary(FUNC_SQRT); + } + + } + diff --git a/doc/MultipleEntryPointSupportProposal.md b/doc/MultipleEntryPointSupportProposal.md new file mode 100644 index 00000000..bf2d7056 --- /dev/null +++ b/doc/MultipleEntryPointSupportProposal.md @@ -0,0 +1,377 @@ +#MultipleEntryPointSupportProposal +*How to extend Aparapi to allow multiple entrypoints for kernels Updated Jul 30, 2012 by frost.g...@gmail.com* + +##The Current Single Entrypoint World + +At present Aparapi allows us to dispatch execution to a single 'single entry point' in a Kernel. Essentially for each Kernel only the overridden Kernel.run() method can be used to initiate execution on the GPU. + +Our canonical example is the 'Squarer' Kernel which allows us to create squares for each element in an input array in an output array. + + Kernel squarer = new Kernel(){ + @Overide public void run(){ + int id = getGlobalId(0); + out[id] = in[id] * in[id]; + } + }; + +If we wanted a vector addition Kernel we would have to create a whole new Kernel. + + Kernel adder = new Kernel(){ + @Overide public void run(){ + int id = getGlobalId(0); + out[id] = in[id] * in[id]; + } + }; + +For us to square and then add a constant we would have to invoke two kernels. Or of course create single SquarerAdder kernel. + +See this page EmulatingMultipleEntrypointsUsingCurrentAPI for ideas on how to emulate having multiple methods, by passing data to a single run() method. + +##Why can't Aparapi just allow 'arbitary' methods + +Ideally we would just expose a more natural API, one which allows us to provide specific methods for each arithmetic operation. + +Essentially + + class VectorKernel extends Kernel{ + public void add(); + public void sub(); + public void sqr(); + public void sqrt(); + } + +Unfortunately this is hard to implement using Aparapi. There are two distinct problems, both at runtime. + + How will Aparapi know which of the available methods we want to execute when we call Kernel.execute(range)? + On first execution how does Aparapi determine which methods might be entrypoints and are therefore need to be converted to OpenCL? + +The first problem can be solved by extending Kernel.execute() to accept a method name + + kernel.execute(SIZE, "add"); + +This is the obvious solution, but really causes maintenence issues int that it trades compile time reporting for a runtime errors. If a developer mistypes the name of the method, :- + + kernel.execute(SIZE, "sadd"); // there is no such method + +The code will compile perfectly, only at runtime will we detect that there is no such method. +##An aside + +Maybe the new Java 8 method reference feature method might help here. In the paper below Brian Goetz talks about a double-colon syntax (Class::Method) for directly referencing a method which is presumably checked at compile time. + +So presumably + + kernel.execute(SIZE, VectorKernel::add); + +Would compile just fine, whereby + + kernel.execute(SIZE, VectorKernel::sadd); + +Would yield a compile time error. + +See Brian Goetz's excellent Lambda documentation +##back from Aside + +The second problem (knowing which methods need to be converted to OpenCL) can probably be solved using an Annotation. + + class VectorKernel extends Kernel{ + @EntryPoint public void add(); + @EntryPoint public void sub(); + @EntryPoint public void sqr(); + @EntryPoint public void sqrt(); + public void nonOpenCLMethod(); + } + +Here the @EntryPoint annotation allows the Aparapi runtime to determine which methods need to be exposed. +#My Extension Proposal + +Here is my proposal. Not only does it allow us to reference multiple entryoints, but I think it actually improves the single entrypoint API, albeit at the cost of being more verbose. +##The developer must provide an API interface + +First I propose that we should ask the developer to provide an interface for all methods that we wish to execute on the GPU (or convert to OpenCL). + + interface VectorAPI extends AparapiAPI { + public void add(Range range); + public void sub(Range range); + public void sqrt(Range range); + public void sqr(Range range); + } + +Note that each API takes a Range, this will make more sense in a moment. +##The developer provides a bound implementation + +Aparapi should provide a mechanism for mapping the proposed implementation of the API to it's implementation. + +Note the weasel words here, this is not a conventional implementation of an interface. We will use an annotation (@Implements(Class class)) to provide the binding. + + @Implements(VectorAPI.class) class Vector extends Kernel { + public void add(RangeId rangeId){/*implementation here */} + public void sub(RangeId rangeId){/*implementation here */} + public void sqrt(RangeId rangeId){/*implementation here */} + public void sqr(RangeId rangeId){/*implementation here */} + public void public void nonOpenCLMethod(); + } + +##Why we can't the implementation just implement the interface? + +This would be ideal. Sadly we need to intercept a call to say VectorAPI.add(Range) and dispatch to the resulting Vector.add(RangeId) instances. If you look at the signatures, the interface accepts a Range as it's arg (the range over which we intend to execute) whereas the implementation (either called by JTP threads or GPU OpenCL dispatch) receives a RangeId (containing the unique globalId, localId, etc fields). At the very end of this page I show a strawman implementation of a sequential loop implementation. +##So how do we get an implementation of VectorAPI + +We instantiate our Kernel by creating an instance using new. We then ask this instance to create an API instance. Some presumably java.util.Proxy trickery will create an implementation of the actual instance, backed by the Java implementation. + +So execution would look something like. + + Vector kernel = new Vector(); + VectorAPI kernelApi = kernel.api(); + Range range = Range.create(SIZE); + kernalApi.add(range); + +So the Vector instance is a pure Java implementation. The extracted API is the bridge to the GPU. + +Of course then we can also execute using an inline call through api() + + Vector kernel = new Vector(); + Range range = Range.create(SIZE); + kernel.api().add(range); + kernel.api().sqrt(range); + +or even expose api as public final fields + + Vector kernel = new Vector(); + Range range = Range.create(SIZE); + kernel.api.add(range); + kernel.api.sqrt(range); + +##How would our canonical Squarer example look + + interface SquarerAPI extends AparapiAPI{ + square(Range range); + } + + @Implement(SquarerAPI) class Squarer extends Kernel{ + int in[]; + int square[]; + public void square(RangeId rangeId){ + square[rangeId.gid] = in[rangeId.gid]*in[rangeId.gid]; + } + } + +Then we execute using + + Squarer squarer = new Squarer(); + // fill squarer.in[SIZE] + // create squarer.values[SIZE]; + +squarer.api().square(Range.create(SIZE)); + +#Extending this proposal to allow argument passing + +Note that we have effectively replaced the use of the 'abstract' squarer.execute(range) with the more concrete squarer.api().add(range). + +Now I would like to propose that we take one more step by allowing us to pass arguments to our methods. + +Normally Aparapi captures buffer and field accesses to create the args that it passes to the generated OpenCL code. In our cannonical squarer example the in[] and square[] buffers are captured from the bytecode and passed (behind the scenes) to the OpenCL. + +* **TODO: Add generated OpenCl code to show what this looks like.** * + +However, by exposing the actual method we want to execute, we could also allow the API to accept parameters. + +So our squarer example would go from + + interface SquarerAPI extends AparapiAPI{ + square(Range range); + } + + @Implement(SquarerAPI) class Squarer extends Kernel{ + int in[]; + int square[]; + public void square(RangeId rangeId){ + square[rangeId.gid] = in[rangeId.gid]*in[rangeId.gid]; + } + } + + + Squarer squarer = new Squarer(); + // fill squarer.in[SIZE] + // create squarer.values[SIZE]; + + squarer.api().square(Range.create(SIZE)); + +to + + interface SquarerAPI extends AparapiAPI{ + square(Range range, int[] in, int[] square); + } + + @Implement(SquarerAPI) class Squarer extends Kernel{ + public void square(RangeId rangeId, int[] in, int[] square){ + square[rangeId.gid] = in[rangeId.gid]*in[rangeId.gid]; + } + } + + + Squarer squarer = new Squarer(); + int[] in = // create and fill squarer.in[SIZE] + int[] square = // create squarer.values[SIZE]; + + squarer.api().square(Range.create(SIZE), in, result); + +I think that this makes Aparapi look more conventional. It also allows us to allow overloading for the first time. + + interface SquarerAPI extends AparapiAPI{ + square(Range range, int[] in, int[] square); + square(Range range, float[] in, float[] square); + } + + @Implement(SquarerAPI) class Squarer extends Kernel{ + public void square(RangeId rangeId, int[] in, int[] square){ + square[rangeId.gid] = in[rangeId.gid]*in[rangeId.gid]; + } + public void square(RangeId rangeId, float[] in, float[] square){ + square[rangeId.gid] = in[rangeId.gid]*in[rangeId.gid]; + } + } + + + Squarer squarer = new Squarer(); + int[] in = // create and fill squarer.in[SIZE] + int[] square = // create squarer.values[SIZE]; + + squarer.api().square(Range.create(SIZE), in, result); + float[] inf = // create and fill squarer.in[SIZE] + float[] squaref = // create squarer.values[SIZE]; + + squarer.api().square(Range.create(SIZE), inf, resultf); + +--- + +test harness + + import java.lang.reflect.InvocationHandler; + import java.lang.reflect.Method; + import java.lang.reflect.Proxy; + + + public class Ideal{ + + public static class OpenCLInvocationHandler<T> implements InvocationHandler { + Object instance; + OpenCLInvocationHandler(Object _instance){ + instance = _instance; + } + @Override public Object invoke(Object interfaceThis, Method interfaceMethod, Object[] interfaceArgs) throws Throwable { + Class clazz = instance.getClass(); + + Class[] argTypes = interfaceMethod.getParameterTypes(); + argTypes[0]=RangeId.class; + Method method = clazz.getDeclaredMethod(interfaceMethod.getName(), argTypes); + + + if (method == null){ + System.out.println("can't find method"); + }else{ + RangeId rangeId = new RangeId((Range)interfaceArgs[0]); + interfaceArgs[0]=rangeId; + for (rangeId.wgid = 0; rangeId.wgid <rangeId.r.width; rangeId.wgid++){ + method.invoke(instance, interfaceArgs); + } + } + + return null; + } + } + + static class Range{ + int width; + Range(int _width) { + width = _width; + } + } + + static class Range2D extends Range{ + int height; + + Range2D(int _width, int _height) { + super(_width); + height = _height; + } + } + + static class Range1DId<T extends Range>{ + Range1DId(T _r){ + r = _r; + } + T r; + + int wgid, wlid, wgsize, wlsize, wgroup; + } + + static class RangeId extends Range1DId<Range>{ + RangeId(Range r){ + super(r); + } + } + + static class Range2DId extends Range1DId<Range2D>{ + Range2DId(Range2D r){ + super(r); + } + + int hgid, hlid, hgsize, hlsize, hgroup; + } + + + + + + static <T> T create(Object _instance, Class<T> _interface) { + OpenCLInvocationHandler<T> invocationHandler = new OpenCLInvocationHandler<T>(_instance); + T instance = (T) Proxy.newProxyInstance(Ideal.class.getClassLoader(), new Class[] { + _interface, + + }, invocationHandler); + return (instance); + + } + + + + public static class Squarer{ + interface API { + public API foo(Range range, int[] in, int[] out); + public Squarer dispatch(); + + } + + public API foo(RangeId rangeId, int[] in, int[] out) { + out[rangeId.wgid] = in[rangeId.wgid]*in[rangeId.wgid]; + return(null); + } + } + + /** + * @param args + */ + public static void main(String[] args) { + + Squarer.API squarer = create(new Squarer(), Squarer.API.class); + int[] in = new int[] { + 1, + 2, + 3, + 4, + 5, + 6 + }; + int[] out = new int[in.length]; + Range range = new Range(in.length); + + squarer.foo(range, in, out); + + for (int s:out){ + System.out.println(s); + } + + } + + } + diff --git a/doc/README.md b/doc/README.md index 85db83cf..1891988a 100644 --- a/doc/README.md +++ b/doc/README.md @@ -20,9 +20,9 @@ APARAPI Documentation | [ChoosingSpecificDevicesForExecution](ChoosingSpecificDevicesForExecution.md) | Using the new Device API's to choose Kernel execution on a specific device. | | Gadgets | Gadgetorium| | [ConvertingBytecodeToOpenCL](ConvertingBytecodeToOpenCL.md) | How Aparapi converts bytecode to OpenCL | -| DevelopersGuideLinux | Developer guide for Linux. | -| DevelopersGuideWindows | Developers guide for Windows. | -| EmulatingMultipleEntrypointsUsingCurrentAPI | How to emulate multiple entrypoints using existing Aparapi APIs | +| [DevelopersGuideLinux](DevelopersGuideLinux.md) | Developer guide for Linux. | +| [DevelopersGuideWindows](DevelopersGuideWindows.md) | Developers guide for Windows. | +| [EmulatingMultipleEntrypointsUsingCurrentAPI](EmulatingMultipleEntrypointsUsingCurrentAPI.md) | How to emulate multiple entrypoints using existing Aparapi APIs | | MultipleEntryPointSupportProposal | How to extend Aparapi to allow multiple entrypoints for kernels | | [ExplicitBufferHandling](ExplicitBufferHandling.md) | How to minimize buffer transfers | | AparapiPatterns | Examples and code fragments to demonstrate Aparapi fetaures. | -- GitLab