diff --git a/.gitignore b/.gitignore index b44b95d6339932e40a637ca96666d85f39b9c11a..7ad7af935a7eb3bbade661b421e3f517afe2dd3d 100644 --- a/.gitignore +++ b/.gitignore @@ -29,3 +29,7 @@ hs_err_pid* **/include/ **/nbproject/ target/ + +# java files +*.class +*~ diff --git a/CHANGELOG b/CHANGELOG new file mode 100644 index 0000000000000000000000000000000000000000..5424de6f38496d849368db7ba8b950af31c34fc5 --- /dev/null +++ b/CHANGELOG @@ -0,0 +1,5 @@ + +Change log can be found at: +https://gitlab.com/mora/aparapi-ucores/wikis/ReleaseNotes + + diff --git a/LICENSE b/LICENSE index e06d2081865a766a8668acc12878f98b27fc9ea0..7f8889ba59bee4fa975f14b490c2c8e438c6aa38 100644 --- a/LICENSE +++ b/LICENSE @@ -1,4 +1,4 @@ -Apache License + Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ @@ -198,5 +198,4 @@ Apache License distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and - limitations under the License. - + limitations under the License. \ No newline at end of file diff --git a/README.md b/README.md index 13b08636cf2dd9ac7671b11576610a79c3af5002..aa298eb492d459cc9fcf9b90c8cf90e52fcf8b3d 100644 --- a/README.md +++ b/README.md @@ -74,4 +74,4 @@ Kernel kernel = new Kernel() { Range range = Range.create(result.length); kernel.execute(range); -``` +``` \ No newline at end of file diff --git a/agent.sh b/agent.sh new file mode 100644 index 0000000000000000000000000000000000000000..731145f56c3bfd67f4f67c74260a0c4d34b08a88 --- /dev/null +++ b/agent.sh @@ -0,0 +1,10 @@ +java \ + -agentpath:../../com.amd.aparapi.jni/dist/libaparapi_x86_64.so\ + -Dcom.amd.aparapi.useAgent=true\ + -Djava.library.path=../third-party/jogamp \ + -Dcom.amd.aparapi.executionMode=$1 \ + -Dbodies=$2 \ + -Dheight=600 \ + -Dwidth=600 \ + -classpath ../third-party/jogamp/jogl-all.jar:../third-party/jogamp/gluegen-rt.jar:../../com.amd.aparapi/dist/aparapi.jar:nbody.jar \ + com.amd.aparapi.examples.nbody.Main diff --git a/docs/APARAPIInstallationNotes.txt b/docs/APARAPIInstallationNotes.txt new file mode 100644 index 0000000000000000000000000000000000000000..bda639ba9a52389e60c57774394fae0dbd4e3a3a --- /dev/null +++ b/docs/APARAPIInstallationNotes.txt @@ -0,0 +1,41 @@ +--------------------------------------------------------- +APARAPI Installation Notes for CentOS release 6.x (Tested on 6.3/4/5/6) +--------------------------------------------------------- + +* Note default CentOs repository versions of Java(1.6) and Ant(1.7) are not compatible with latest APARAPI svn src + need to download newr versions (see below) + +- install Java JDK + sudo yum install java-1.7.0-openjdk-devel + +- install ANT + Download latest ANT -> currently apache-ant-1.9.1-bin.tar.gz + sudo tar xvzf apache-ant-1.9.1-bin.tar.gz -C /opt + sudo ln -s /opt/apache-ant-1.9.1 /opt/ant + +- install AMD APP SDK + + - needed for build even if hardware not available + +- install git + +- install g++ + + sudo yum install gcc-c++ + +- get src from git repository + + git clone ... [project folder] + +- set environment vars + + source [project folder]/env/aparapiBuildEnv + +- build + + cd [project folder]/src/aparapi + + ant clean build dist + + + diff --git a/docs/AparapiUcoresBinaryFlow.txt b/docs/AparapiUcoresBinaryFlow.txt new file mode 100644 index 0000000000000000000000000000000000000000..f40b79c35ca512d177ec3a09feba516faf6269f7 --- /dev/null +++ b/docs/AparapiUcoresBinaryFlow.txt @@ -0,0 +1,34 @@ +Aparapi Ucores Binary Flow +-------------------------- + +This is currently used only for FPGAs but we plan to expand it for general manual optimization/compilation of OpenCL source code. + +A Binary flow is one where the Aparapi framework generates an OpenCL file while running and then tries to read and execute the binary compilation of that source file. + +Sequence of events +------------------ + +1. Aparapi Application is executed. Aparapi automatically generates an OpencL source file describing the kernel(file name is the full java kernel class name.cl) + +2. Aparapi Application automatically tries to load the binary compiled file (full java kernel class name.aocx) + +3. If Aparapi Application finds the compiled file it executes the compiled kernel (runs the kernel on the accelerator) else it will revert to JTP mode in which case the application runs as specified without acceleration (JTP mode). A simulation mode if you will. + +How this works for FPGAs +------------------------ + +1. We first run the Application CPU/GPU/ACC mode. Aparapi-Ucores will generate an OpenCL file (ClassXXXName.cl), but will not find the equivalent binary file (ClassXXXName.aocx). It will revert to another execution mode such as JTP. We can simulate if the kernel works logically correct using JTP. + +2. We feed the OpenCL file (ClassXXXName.cl) to an FPGA OpenCL compiler. + +Example: + + a. aoc ClassXXXName.cl -O3 --board pcie385n_a7 -v --report + + b. Wait until an optimized FPGA design is created...currently takes several hours + + c. Output of the compiler will be: ClassXXXName.aocx + + 3. Copy the binary output file (ClassXXXName.aocx) to folder where the .cl file resides. + +4. Run application again in ACC/GPU mode. This time Aparapi will detect the ClassXXXName.aocx and use the FPGA as the acclerator. \ No newline at end of file diff --git a/docs/HowToBuildAPARAPI.txt b/docs/HowToBuildAPARAPI.txt new file mode 100644 index 0000000000000000000000000000000000000000..bb332e064edcf02eee2cd4d91953bb7e7d20e6a6 --- /dev/null +++ b/docs/HowToBuildAPARAPI.txt @@ -0,0 +1,63 @@ + +--------------------------------------------------------- +How to build APARAPI from the cmd line after modifications to code +--------------------------------------------------------- +* first read APARAPI Installation Notes (APARAPIInstallationNotes.txt) for general instructions on getting the project setup and initial build steps + +- open new terminal window + +- set environment variables + + source [project folder]/env/AlteraV14Env + +- to build the APARAPI src tree + + cd [project folder]/src/aparapi + + ant clean build dist + + * this does not build the jni proxy with FPGA support automatically (see below for jni FPGA build) + +- to build the APARAPI jni proxy library (should run after initial build and after any changes to CPP source files) + + - goto jni dir + + cd [project folder]/src/aparapi/com.amd.aparapi.jni/ + + - to use ant to build standard jni version * + + ant + + - to use ant to build FPGA jni version * + + ant -f build_altera_ocl.xml + +* The output of the com.amd.aparapi.jni build is a dll stored in dist folder: + + [project folder]/src/aparapi/com.amd.aparapi.jni/dist/libaparapi_x86_64.so + +If you have a system where you want to have both FPGA and standard versions available (i.e. with multi OpenCL hardware platforms)you can simply copy the dll's to diffrent locations after the build and link to them through the java vm command line parameters: + +Example: + +Copy the fpga and standard dll's to the following locations respectively: +/src/aparapi/com.amd.aparapi.jni/dist.fpga/libaparapi_x86_64.so +/src/aparapi/com.amd.aparapi.jni/dist.std/libaparapi_x86_64.so + +To select one JNI version or the other at execution time run use the following paramaters to the java command: +java -Djava.library.path=../../com.amd.aparapi.jni/dist.std [rest of cmd line] +java -Djava.library.path=../../com.amd.aparapi.jni/dist.fpga [rest of cmd line] + + + + + + + + + + + + + + diff --git a/docs/HowToRunAPARAPI.txt b/docs/HowToRunAPARAPI.txt new file mode 100644 index 0000000000000000000000000000000000000000..2506b0e2cfd0ffd936a400c7cfdc0880ac6e4f91 --- /dev/null +++ b/docs/HowToRunAPARAPI.txt @@ -0,0 +1,62 @@ + +--------------------------------------------------------- +How to run APARAPI tests from the cmd line +--------------------------------------------------------- + +- open new terminal window + +- set environment variables + + source [project folder]/env/AlteraV14Env + +- got to either samples/examples + + - samples dir -> + + cd [project folder]/src/aparapi/samples + + - examples dir-> + + cd [project folder]/src/aparapi/examples/ + +- to run any sample or example: + + - run the shell script file named after the sample/example name and specify type of run + + Format is*: + + sh [name].sh [ACC|CPU|JTP|GPU|SEQ] + + Examples: + + - run nbody simulation + cd [project folder]/src/aparapi/examples/nbody + sh nbody.sh JTP + sh nbody.sh CPU + sh nbody.sh GPU + sh nbody.sh ACC + + - run mandel + cd [project folder]/src/aparapi/samples/mandel/ + sh mandel.sh JTP + sh mandel.sh CPU + sh mandel.sh GPU + sh mandel.sh ACC + + +* For more sophisticated use cases with dual configs(fpga/std) you can use the following format: + + sh [name].std.sh [ACC|CPU|JTP|GPU|SEQ] + + sh [name].fpga.sh [ACC|CPU|JTP|GPU|SEQ] + +You can read more about dual config in how to build aparapi. + + + + + + + + + diff --git a/docs/WorkingWithMultiplePlatforms.txt b/docs/WorkingWithMultiplePlatforms.txt new file mode 100644 index 0000000000000000000000000000000000000000..d55a415aeaea67f57a751ad005cd24b3e0e95b42 --- /dev/null +++ b/docs/WorkingWithMultiplePlatforms.txt @@ -0,0 +1,46 @@ +Working with multiple platforms +------------------------------- + +The original version of Aparapi does not support platform selection (it just chooses the first available one). + +This means that in scenarios where you have multiple OpenCL platforms such as AMD, NVidia, Intel, Altera etc. you do not have control over what accelerator device will be used. + +One of the changes we made is to allow selection for platforms that support the OpenCL ICD model. +(https://www.khronos.org/news/permalink/opencl-installable-client-driver-icd-loader) + +In the spirit of other Aparapi configuration options for device selection this can be set using a configuration option. + +The platform configuration option is called: com.amd.aparapi.platformHint + +When this option is set it causes Aparapi to search for the string platformHint inside the available platform names and if it found it will select that platform over other available ones. + +You can use it on the command line in the following way: + +java -Dcom.amd.aparapi.platformHint=AMD .... + +or + +java -Dcom.amd.aparapi.platformHint=NVIDIA .... + + +Nbody simulation example with platform selection +--------------------------------------------------------------------- + +The script nbody.std.no-opengles-platform-select.sh contains the following java command line: + +java \ + -Djava.library.path=../../com.amd.aparapi.jni/dist.std:../third-party/jogamp \ + -Dcom.amd.aparapi.executionMode=$1 \ + -Dbodies=$2 \ + -Dcom.amd.aparapi.platformHint=$3 \ + -Dheight=600 \ + -Dwidth=600 \ + -Djogl.disable.opengles \ + -classpath ../third-party/jogamp/jogl-all.jar:../third-party/jogamp/gluegen-rt.jar:../../com.amd.aparapi/dist/aparapi.jar:nbody.jar \ + com.amd.aparapi.examples.nbody.Main + +To choose an AMD GPU for example we would invoke the script in the following way(the third parameter): + +nbody.std.no-opengles-platform-select.sh GPU 1024 AMD + + diff --git a/env/AlteraV14Env b/env/AlteraV14Env new file mode 100644 index 0000000000000000000000000000000000000000..501d6a4db7ce3a70915e84fa7f29ab27ec293c46 --- /dev/null +++ b/env/AlteraV14Env @@ -0,0 +1,8 @@ +export QUARTUS_ROOTDIR=/home/sdev/altera/14.0/quartus +export PATH=$PATH:$QUARTUS_ROOTDIR/bin +export ALTERAOCLSDKROOT=/home/sdev/altera/14.0/hld +export LD_LIBRARY_PATH=$ALTERAOCLSDKROOT/host/linux64/lib:$ALTERAOCLSDKROOT/board/nalla_pcie/linux64/lib +export PATH=$PATH:$ALTERAOCLSDKROOT/bin +export AOCL_BOARD_PACKAGE_ROOT=/home/sdev/altera/14.0/hld/board/nalla_pcie +export LM_LICENSE_FILE=[your license file here] + diff --git a/env/aparapiBuildEnv b/env/aparapiBuildEnv new file mode 100644 index 0000000000000000000000000000000000000000..20ee4815b70bcc95ff781ebd58d1bb2d4d5665f5 --- /dev/null +++ b/env/aparapiBuildEnv @@ -0,0 +1,6 @@ +# Ant build environment +export ANT_HOME=/opt/ant +export PATH=${PATH}:${ANT_HOME}/bin + + + diff --git a/env/mahoutEnv b/env/mahoutEnv new file mode 100644 index 0000000000000000000000000000000000000000..14a7683aada0538e3532d41b957b48b1586f74ee --- /dev/null +++ b/env/mahoutEnv @@ -0,0 +1,10 @@ +# Generated file for hadoop/mahout env +export JAVA_HOME=/usr/lib/jvm/jre-1.7.0-openjdk.x86_64/ +#export HADOOP_HOME=/usr/local/hadoop-1.2.0/ +export HADOOP_DIR=/usr/local/hadoop-1.2.0 +export HADOOP_PREFIX=$HADOOP_DIR/ +export HADOOP_CONF_DIR=HADOOP_DIR/conf +export MAHOUT_HOME=/usr/local/mahout-0.9/ +export MAHOUT_VERSION=0.9-SNAPSHOT +#export MAVEN_OPTS=-Xmx1024m + diff --git a/examples/correlation-matrix/.project b/examples/correlation-matrix/.project deleted file mode 100644 index 89d60ffcfa30f47bda1bf65bbd588e765ffe5786..0000000000000000000000000000000000000000 --- a/examples/correlation-matrix/.project +++ /dev/null @@ -1,17 +0,0 @@ -<?xml version="1.0" encoding="UTF-8"?> -<projectDescription> - <name>correlation-matrix</name> - <comment></comment> - <projects> - </projects> - <buildSpec> - <buildCommand> - <name>org.eclipse.jdt.core.javabuilder</name> - <arguments> - </arguments> - </buildCommand> - </buildSpec> - <natures> - <nature>org.eclipse.jdt.core.javanature</nature> - </natures> -</projectDescription> diff --git a/local.bat b/local.bat new file mode 100644 index 0000000000000000000000000000000000000000..c89fd6ddf53fabd74317726a59f16f54c37e033c --- /dev/null +++ b/local.bat @@ -0,0 +1,15 @@ +@echo off + +java ^ + -Djava.library.path=..\..\com.amd.aparapi.jni\dist;..\third-party\jogamp ^ + -Dcom.amd.aparapi.executionMode=%1 ^ + -Dcom.amd.aparapi.enableShowGeneratedOpenCL=true ^ + -Dcom.amd.aparapi.enableVerboseJNI=false ^ + -Dcom.amd.aparapi.enableProfiling=true ^ + -Dbodies=%2 ^ + -Dheight=600 ^ + -Dwidth=600 ^ + -classpath ..\third-party\jogamp\gluegen-rt.jar;..\third-party\jogamp\jogl.all.jar;..\..\com.amd.aparapi\dist\aparapi.jar;nbody.jar ^ + com.amd.aparapi.examples.nbody.Local + + diff --git a/nbody-agent.bat b/nbody-agent.bat new file mode 100644 index 0000000000000000000000000000000000000000..2dfdc11e478ed846235864a6a6e25d683a345e37 --- /dev/null +++ b/nbody-agent.bat @@ -0,0 +1,16 @@ +@echo off + +java ^ + -agentpath:../../com.amd.aparapi.jni/dist/aparapi_x86_64.dll ^ + -Dcom.amd.aparapi.useAgent=true ^ + -Djava.library.path=..\third-party\jogamp ^ + -Dcom.amd.aparapi.executionMode=%1 ^ + -Dcom.amd.aparapi.enableProfiling=false ^ + -Dcom.amd.aparapi.enableShowGeneratedOpenCL=true ^ + -Dbodies=%2 ^ + -Dheight=600 ^ + -Dwidth=600 ^ + -classpath ..\third-party\jogamp\gluegen-rt.jar;..\third-party\jogamp\jogl-all.jar;..\..\com.amd.aparapi\dist\aparapi.jar;nbody.jar ^ + com.amd.aparapi.examples.nbody.Main + + diff --git a/nbody.bat b/nbody.bat new file mode 100644 index 0000000000000000000000000000000000000000..b6e01554fcefe4bb0adff2da6be94ddb50d53434 --- /dev/null +++ b/nbody.bat @@ -0,0 +1,15 @@ +@echo off + +java ^ + -Djava.library.path=..\..\com.amd.aparapi.jni\dist;..\third-party\jogamp ^ + -Dcom.amd.aparapi.executionMode=%1 ^ + -Dcom.amd.aparapi.enableProfiling=false ^ + -Dcom.amd.aparapi.enableShowGeneratedOpenCL=true ^ + -Dcom.amd.aparapi.logLevel=SEVERE ^ + -Dbodies=%2 ^ + -Dheight=600 ^ + -Dwidth=600 ^ + -classpath ..\third-party\jogamp\gluegen-rt.jar;..\third-party\jogamp\jogl-all.jar;..\..\com.amd.aparapi\dist\aparapi.jar;nbody.jar ^ + com.amd.aparapi.examples.nbody.Main + + diff --git a/nbody.fpga.no-opengles.sh b/nbody.fpga.no-opengles.sh new file mode 100644 index 0000000000000000000000000000000000000000..bfd669b05f96bdbc666540e3a1ef1869360d0878 --- /dev/null +++ b/nbody.fpga.no-opengles.sh @@ -0,0 +1,11 @@ + +java \ + -Djava.library.path=../../com.amd.aparapi.jni/dist.fpga:../third-party/jogamp \ + -Dcom.amd.aparapi.executionMode=$1 \ + -Dbodies=$2 \ + -Dheight=600 \ + -Dwidth=600 \ + -Djogl.disable.opengles \ + -classpath ../third-party/jogamp/jogl-all.jar:../third-party/jogamp/gluegen-rt.jar:../../com.amd.aparapi/dist/aparapi.jar:nbody.jar \ + com.amd.aparapi.examples.nbody.Main + diff --git a/nbody.fpga.sh b/nbody.fpga.sh new file mode 100644 index 0000000000000000000000000000000000000000..15dd1cae5760c24b8e177192be6bed4013254154 --- /dev/null +++ b/nbody.fpga.sh @@ -0,0 +1,10 @@ + +java \ + -Djava.library.path=../../com.amd.aparapi.jni/dist.fpga:../third-party/jogamp \ + -Dcom.amd.aparapi.executionMode=$1 \ + -Dbodies=$2 \ + -Dheight=600 \ + -Dwidth=600 \ + -classpath ../third-party/jogamp/jogl-all.jar:../third-party/jogamp/gluegen-rt.jar:../../com.amd.aparapi/dist/aparapi.jar:nbody.jar \ + com.amd.aparapi.examples.nbody.Main + diff --git a/nbody.nv.profile.sh b/nbody.nv.profile.sh new file mode 100644 index 0000000000000000000000000000000000000000..70ed062dec99b4fec3dfe4127608d9add89aadb0 --- /dev/null +++ b/nbody.nv.profile.sh @@ -0,0 +1,12 @@ + +java \ + -Djava.library.path=../../com.amd.aparapi.jni/dist.std:../third-party/jogamp \ + -Dcom.amd.aparapi.executionMode=$1 \ + -Dcom.amd.aparapi.enableProfiling=true \ + -Dbodies=$2 \ + -Dheight=600 \ + -Dwidth=600 \ + -Djogl.disable.opengles \ + -classpath ../third-party/jogamp/jogl-all.jar:../third-party/jogamp/gluegen-rt.jar:../../com.amd.aparapi/dist/aparapi.jar:nbody.jar \ + com.amd.aparapi.examples.nbody.Main + diff --git a/nbody.nv.sh b/nbody.nv.sh new file mode 100644 index 0000000000000000000000000000000000000000..bcecff31affe03a426e6564c697165096aeabede --- /dev/null +++ b/nbody.nv.sh @@ -0,0 +1,11 @@ + +java \ + -Djava.library.path=../../com.amd.aparapi.jni/dist.std:../third-party/jogamp \ + -Dcom.amd.aparapi.executionMode=$1 \ + -Dbodies=$2 \ + -Dheight=600 \ + -Dwidth=600 \ + -Djogl.disable.opengles \ + -classpath ../third-party/jogamp/jogl-all.jar:../third-party/jogamp/gluegen-rt.jar:../../com.amd.aparapi/dist/aparapi.jar:nbody.jar \ + com.amd.aparapi.examples.nbody.Main + diff --git a/nbody.sh b/nbody.sh new file mode 100644 index 0000000000000000000000000000000000000000..75289f181d975eb550465dc88e31f35c35b77e59 --- /dev/null +++ b/nbody.sh @@ -0,0 +1,10 @@ + +java \ + -Djava.library.path=../../com.amd.aparapi.jni/dist:../third-party/jogamp \ + -Dcom.amd.aparapi.executionMode=$1 \ + -Dbodies=$2 \ + -Dheight=600 \ + -Dwidth=600 \ + -classpath ../third-party/jogamp/jogl-all.jar:../third-party/jogamp/gluegen-rt.jar:../../com.amd.aparapi/dist/aparapi.jar:nbody.jar \ + com.amd.aparapi.examples.nbody.Main + diff --git a/nbody.std.no-opengles-platform-select-profile.sh b/nbody.std.no-opengles-platform-select-profile.sh new file mode 100644 index 0000000000000000000000000000000000000000..665375ed179548549a057ac1338bdc01cd4e3d8c --- /dev/null +++ b/nbody.std.no-opengles-platform-select-profile.sh @@ -0,0 +1,13 @@ + +java \ + -Djava.library.path=../../com.amd.aparapi.jni/dist.std:../third-party/jogamp \ + -Dcom.amd.aparapi.executionMode=$1 \ + -Dbodies=$2 \ + -Dcom.amd.aparapi.platformHint=$3 \ + -Dcom.amd.aparapi.enableProfiling=true \ + -Dheight=600 \ + -Dwidth=600 \ + -Djogl.disable.opengles \ + -classpath ../third-party/jogamp/jogl-all.jar:../third-party/jogamp/gluegen-rt.jar:../../com.amd.aparapi/dist/aparapi.jar:nbody.jar \ + com.amd.aparapi.examples.nbody.Main + diff --git a/nbody.std.no-opengles-platform-select.sh b/nbody.std.no-opengles-platform-select.sh new file mode 100644 index 0000000000000000000000000000000000000000..f663e32014cf60ddfccaef877130c4eb3bcb75f9 --- /dev/null +++ b/nbody.std.no-opengles-platform-select.sh @@ -0,0 +1,12 @@ + +java \ + -Djava.library.path=../../com.amd.aparapi.jni/dist.std:../third-party/jogamp \ + -Dcom.amd.aparapi.executionMode=$1 \ + -Dbodies=$2 \ + -Dcom.amd.aparapi.platformHint=$3 \ + -Dheight=600 \ + -Dwidth=600 \ + -Djogl.disable.opengles \ + -classpath ../third-party/jogamp/jogl-all.jar:../third-party/jogamp/gluegen-rt.jar:../../com.amd.aparapi/dist/aparapi.jar:nbody.jar \ + com.amd.aparapi.examples.nbody.Main + diff --git a/nbody.std.no-opengles.profile+csv.sh b/nbody.std.no-opengles.profile+csv.sh new file mode 100644 index 0000000000000000000000000000000000000000..2089d7e656a441ff83f337c5e31abc24b048fd7a --- /dev/null +++ b/nbody.std.no-opengles.profile+csv.sh @@ -0,0 +1,16 @@ + +java \ + -Djava.library.path=../../com.amd.aparapi.jni/dist.std:../third-party/jogamp \ + -Dcom.amd.aparapi.executionMode=$1 \ + -Dcom.amd.aparapi.enableShowGeneratedOpenCL=true \ + -Dcom.amd.aparapi.enableVerboseJNI=false \ + -Dcom.amd.aparapi.enableProfilingCSV=true \ + -Dcom.amd.aparapi.enableProfiling=true \ + -Dcom.amd.aparapi.profilingFileNameFormatStr=$1 \ + -Dbodies=$2 \ + -Dheight=600 \ + -Dwidth=600 \ + -Djogl.disable.opengles \ + -classpath ../third-party/jogamp/jogl-all.jar:../third-party/jogamp/gluegen-rt.jar:../../com.amd.aparapi/dist/aparapi.jar:nbody.jar \ + com.amd.aparapi.examples.nbody.Main + diff --git a/nbody.std.no-opengles.profile.sh b/nbody.std.no-opengles.profile.sh new file mode 100644 index 0000000000000000000000000000000000000000..70ed062dec99b4fec3dfe4127608d9add89aadb0 --- /dev/null +++ b/nbody.std.no-opengles.profile.sh @@ -0,0 +1,12 @@ + +java \ + -Djava.library.path=../../com.amd.aparapi.jni/dist.std:../third-party/jogamp \ + -Dcom.amd.aparapi.executionMode=$1 \ + -Dcom.amd.aparapi.enableProfiling=true \ + -Dbodies=$2 \ + -Dheight=600 \ + -Dwidth=600 \ + -Djogl.disable.opengles \ + -classpath ../third-party/jogamp/jogl-all.jar:../third-party/jogamp/gluegen-rt.jar:../../com.amd.aparapi/dist/aparapi.jar:nbody.jar \ + com.amd.aparapi.examples.nbody.Main + diff --git a/nbody.std.no-opengles.sh b/nbody.std.no-opengles.sh new file mode 100644 index 0000000000000000000000000000000000000000..bcecff31affe03a426e6564c697165096aeabede --- /dev/null +++ b/nbody.std.no-opengles.sh @@ -0,0 +1,11 @@ + +java \ + -Djava.library.path=../../com.amd.aparapi.jni/dist.std:../third-party/jogamp \ + -Dcom.amd.aparapi.executionMode=$1 \ + -Dbodies=$2 \ + -Dheight=600 \ + -Dwidth=600 \ + -Djogl.disable.opengles \ + -classpath ../third-party/jogamp/jogl-all.jar:../third-party/jogamp/gluegen-rt.jar:../../com.amd.aparapi/dist/aparapi.jar:nbody.jar \ + com.amd.aparapi.examples.nbody.Main + diff --git a/nbody.std.profile.sh b/nbody.std.profile.sh new file mode 100644 index 0000000000000000000000000000000000000000..57d1c076514e0e18590af94a5318fd970f8452f2 --- /dev/null +++ b/nbody.std.profile.sh @@ -0,0 +1,11 @@ + +java \ + -Djava.library.path=../../com.amd.aparapi.jni/dist.std:../third-party/jogamp \ + -Dcom.amd.aparapi.executionMode=$1 \ + -Dcom.amd.aparapi.enableProfiling=true \ + -Dbodies=$2 \ + -Dheight=600 \ + -Dwidth=600 \ + -classpath ../third-party/jogamp/jogl-all.jar:../third-party/jogamp/gluegen-rt.jar:../../com.amd.aparapi/dist/aparapi.jar:nbody.jar \ + com.amd.aparapi.examples.nbody.Main + diff --git a/nbody.std.sh b/nbody.std.sh new file mode 100644 index 0000000000000000000000000000000000000000..e32ab3c35f670da1b921fdcc3fc174c2bf4d08e8 --- /dev/null +++ b/nbody.std.sh @@ -0,0 +1,10 @@ + +java \ + -Djava.library.path=../../com.amd.aparapi.jni/dist.std:../third-party/jogamp \ + -Dcom.amd.aparapi.executionMode=$1 \ + -Dbodies=$2 \ + -Dheight=600 \ + -Dwidth=600 \ + -classpath ../third-party/jogamp/jogl-all.jar:../third-party/jogamp/gluegen-rt.jar:../../com.amd.aparapi/dist/aparapi.jar:nbody.jar \ + com.amd.aparapi.examples.nbody.Main + diff --git a/pom.xml b/pom.xml index 08ab2b99a183a0a8e275167be2b8c2171a376fc6..c67e9c91559f02cbf7738be415605b5650f87573 100644 --- a/pom.xml +++ b/pom.xml @@ -10,7 +10,7 @@ <groupId>com.aparapi</groupId> <artifactId>aparapi-examples</artifactId> - <version>1.2.1-SNAPSHOT</version> + <version>2.0.0-SNAPSHOT</version> <packaging>jar</packaging> <properties> @@ -81,7 +81,7 @@ <dependency> <groupId>com.aparapi</groupId> <artifactId>aparapi</artifactId> - <version>1.2.0</version> + <version>2.0.0-SNAPSHOT</version> </dependency> <dependency> <groupId>org.jogamp.jogl</groupId> diff --git a/src/main/java/com/aparapi/examples/add/Main.java b/src/main/java/com/aparapi/examples/add/Main.java index 97a4c7e0d8e4aa459da55382b0c0aa4cc3c50b76..a2b8372dd9d8f2cd88bb3b5fa1fd5ab162e0e1a7 100644 --- a/src/main/java/com/aparapi/examples/add/Main.java +++ b/src/main/java/com/aparapi/examples/add/Main.java @@ -48,6 +48,11 @@ under those regulations, please refer to the U.S. Bureau of Industry and Securit package com.aparapi.examples.add; +import com.aparapi.Kernel; +import com.aparapi.Range; +import java.io.IOException; +import java.util.concurrent.TimeUnit; + import com.aparapi.Kernel; import com.aparapi.Range; @@ -55,7 +60,7 @@ public class Main{ public static void main(String[] _args) { - final int size = 512; + final int size = 30*1000*1000; final float[] a = new float[size]; final float[] b = new float[size]; @@ -74,9 +79,39 @@ public class Main{ } }; + // !!! oren -> for JNI debug +// try { +// System.out.printf("Press any key..."); +// System.in.read(); +// } catch (IOException e) { +// // TODO Auto-generated catch block +// e.printStackTrace(); +// } + + // !!! oren -> add time measurement + + System.out.printf("Running kernel.."); + + long startTime = System.nanoTime(); + kernel.execute(Range.create(size)); - for (int i = 0; i < size; i++) { + long elapsedTimeNano = System.nanoTime() - startTime; + + long elapsedTimeSec = TimeUnit.SECONDS.convert(elapsedTimeNano, TimeUnit.NANOSECONDS); + + long elapsedTimeMilli = TimeUnit.MILLISECONDS.convert(elapsedTimeNano, TimeUnit.NANOSECONDS); + + System.out.printf("****************\n"); + System.out.printf("Elapsed time in milli: %d\n",elapsedTimeMilli); + System.out.printf("Elapsed time in sec : %d\n",elapsedTimeSec); + System.out.printf("****************\n"); + + // !!! oren change -> show first 10 only + //for (int i = 0; i < size; i++) { + int displayRange = (size > 20) ? 20 : size; + System.out.printf("**************** Showing first %d results ****************\n",displayRange); + for (int i = 0; i < displayRange; i++) { System.out.printf("%6.2f + %6.2f = %8.2f\n", a[i], b[i], sum[i]); } diff --git a/src/main/java/com/aparapi/examples/add/MainSelectPlatform.java b/src/main/java/com/aparapi/examples/add/MainSelectPlatform.java new file mode 100644 index 0000000000000000000000000000000000000000..fc0b90e726168e91d24d9d31f2d056e455baed70 --- /dev/null +++ b/src/main/java/com/aparapi/examples/add/MainSelectPlatform.java @@ -0,0 +1,263 @@ +/* + +Copyright (c) 2010-2011, Advanced Micro Devices, Inc. + +All rights reserved. + + + +Redistribution and use in source and binary forms, with or without modification, are permitted provided that the + +following conditions are met: + + + +Redistributions of source code must retain the above copyright notice, this list of conditions and the following + +disclaimer. + + + +Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following + +disclaimer in the documentation and/or other materials provided with the distribution. + + + +Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products + +derived from this software without specific prior written permission. + + + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + +INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + +WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + + +If you use the software (in whole or in part), you shall adhere to all applicable U.S., European, and other export + +laws, including but not limited to the U.S. Export Administration Regulations ("EAR"), (15 C.F.R. Sections 730 through + +774), and E.U. Council Regulation (EC) No 1334/2000 of 22 June 2000. Further, pursuant to Section 740.6 of the EAR, + +you hereby certify that, except pursuant to a license granted by the United States Department of Commerce Bureau of + +Industry and Security or as otherwise permitted pursuant to a License Exception under the U.S. Export Administration + +Regulations ("EAR"), you will not (1) export, re-export or release to a national of a country in Country Groups D:1, + +E:1 or E:2 any restricted technology, software, or source code you receive hereunder, or (2) export to Country Groups + +D:1, E:1 or E:2 the direct product of such technology or software, if such foreign produced direct product is subject + +to national security controls as identified on the Commerce Control List (currently found in Supplement 1 to Part 774 + +of EAR). For the most current Country Group listings, or for additional information about the EAR or your obligations + +under those regulations, please refer to the U.S. Bureau of Industry and Security's website at http://www.bis.doc.gov/. + + + +*/ + +package com.aparapi.examples.add; + +import java.io.IOException; +import java.util.concurrent.TimeUnit; +import com.aparapi.Kernel; +import com.aparapi.Range; +import com.aparapi.device.Device; +import com.aparapi.device.OpenCLDevice; +import com.aparapi.internal.opencl.OpenCLPlatform; +import java.util.List; + +public class MainSelectPlatform { + + public static void listPlatformsAndDevices() + { + List<OpenCLPlatform> platforms = (new OpenCLPlatform()).getOpenCLPlatforms(); + + int platformc = 0; + for (OpenCLPlatform platform : platforms) + { + + System.out.println("Platform " + platformc + "{"); + + System.out.println(" Name : \"" + platform.getName() + "\""); + + System.out.println(" Vendor : \"" + platform.getVendor() + "\""); + + System.out.println(" Version : \"" + platform.getVersion() + "\""); + + List<OpenCLDevice> devices = platform.getOpenCLDevices(); + + System.out.println(" Platform contains " + devices.size() + " OpenCL devices"); + + int devicec = 0; + + for (OpenCLDevice device : devices) + { + System.out.println(" Device " + devicec + "{"); + + System.out.println(" Type : " + device.getType()); + + System.out.println(" GlobalMemSize : " + device.getGlobalMemSize()); + + System.out.println(" LocalMemSize : " + device.getLocalMemSize()); + + System.out.println(" MaxComputeUnits : " + device.getMaxComputeUnits()); + + System.out.println(" MaxWorkGroupSizes : " + device.getMaxWorkGroupSize()); + + System.out.println(" MaxWorkItemDimensions : " + device.getMaxWorkItemDimensions()); + + System.out.println(" }"); + + devicec++; + } + + // close platform bracket + System.out.println("}"); + + platformc++; + } + } + + public static void main(String[] args) { + + + + final int size = 1000*1000; + + + + final float[] a = new float[size]; + + final float[] b = new float[size]; + + + + for (int i = 0; i < size; i++) { + + a[i] = (float) (Math.random() * 100); + + b[i] = (float) (Math.random() * 100); + + } + + + + final float[] sum = new float[size]; + + + + Kernel kernel = new Kernel(){ + + @Override public void run() { + + int gid = getGlobalId(); + + sum[gid] = a[gid] + b[gid]; + + } + + }; + + + + + + // !!! oren -> add time measurement + + System.out.printf("Running kernel.."); + + + + long startTime = System.nanoTime(); + + + // !!! experiment with platform/device selection + System.out.printf("**** listPlatformsAndDevices ****\n"); + listPlatformsAndDevices(); + System.out.printf("****************\n"); + if(args.length<2) + { + System.out.printf("****************\n"); + System.out.printf("Usage is: select platformHint deviceType\n"); + System.out.printf("****************\n"); + return; + } + + String platformHint = args[0]; + String deviceType = args[1]; + int deviceId = (args.length>2) ? Integer.parseInt(args[2]) : 0; + String flowTypeStr = (args.length>3) ? args[3] : null; + if(flowTypeStr!=null) + kernel.setFlowType(flowTypeStr); + System.out.printf("**** getDevice ****\n"); + Device device = Device.getDevice(platformHint,deviceType,deviceId); + kernel.execute(Range.create(device,512,16)); + System.out.printf("****************\n"); + + // test new range functionality + Range.create(device,Range.create(512,16)); + + + + long elapsedTimeNano = System.nanoTime() - startTime; + + + + long elapsedTimeSec = TimeUnit.SECONDS.convert(elapsedTimeNano, TimeUnit.NANOSECONDS); + + + + long elapsedTimeMilli = TimeUnit.MILLISECONDS.convert(elapsedTimeNano, TimeUnit.NANOSECONDS); + + + + System.out.printf("****************\n"); + + System.out.printf("Elapsed time in milli: %d\n",elapsedTimeMilli); + + System.out.printf("Elapsed time in sec : %d\n",elapsedTimeSec); + + System.out.printf("****************\n"); + + + + // !!! oren change -> show first 10 only + + //for (int i = 0; i < size; i++) { + + int displayRange = (size > 20) ? 20 : size; + + System.out.printf("**************** Showing first %d results ****************\n",displayRange); + + for (int i = 0; i < displayRange; i++) { + + System.out.printf("%6.2f + %6.2f = %8.2f\n", a[i], b[i], sum[i]); + + } + + + + kernel.dispose(); + + } + + + +} diff --git a/src/main/java/com/aparapi/examples/add/UnsafeAccess.java b/src/main/java/com/aparapi/examples/add/UnsafeAccess.java new file mode 100644 index 0000000000000000000000000000000000000000..db5100cef9da256ec7ca8746f1906bf8842e440f --- /dev/null +++ b/src/main/java/com/aparapi/examples/add/UnsafeAccess.java @@ -0,0 +1,24 @@ +package com.aparapi.examples.add; + +import java.lang.reflect.Field; + +import sun.misc.Unsafe; + +public class UnsafeAccess { + public static final Unsafe unsafe; + static { + try { + // This is a bit of voodoo to force the unsafe object into + // visibility and acquire it. + // This is not playing nice, but as an established back door it is + // not likely to be + // taken away. + Field field = Unsafe.class.getDeclaredField("theUnsafe"); + field.setAccessible(true); + unsafe = (Unsafe) field.get(null); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + +} diff --git a/src/main/java/com/aparapi/examples/add/UnsafeDirectByteBuffer.java b/src/main/java/com/aparapi/examples/add/UnsafeDirectByteBuffer.java new file mode 100644 index 0000000000000000000000000000000000000000..677191a9676576df80418ea0ee19df3120f32168 --- /dev/null +++ b/src/main/java/com/aparapi/examples/add/UnsafeDirectByteBuffer.java @@ -0,0 +1,114 @@ +package com.aparapi.examples.add; + +import java.nio.Buffer; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.nio.FloatBuffer; + +public class UnsafeDirectByteBuffer { + + /** The size of a short primitive type, in bytes. */ + public static final int SIZEOF_SHORT = 2; + /** The size of a int primitive type, in bytes. */ + public static final int SIZEOF_INT = 4; + /** The size of a float primitive type, in bytes. */ + public static final int SIZEOF_FLOAT = 4; + /** The size of a double primitive type, in bytes. */ + public static final int SIZEOF_DOUBLE = 8; + /** The size of a char primitive type, in bytes. */ + public static final int SIZEOF_CHAR = 2; + + + private static final long addressOffset; + public static final int CACHE_LINE_SIZE = 64; + public static final int PAGE_SIZE = UnsafeAccess.unsafe.pageSize(); + static { + try { + addressOffset = UnsafeAccess.unsafe.objectFieldOffset(Buffer.class + .getDeclaredField("address")); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + public static long getAddress(ByteBuffer buffy) { + return UnsafeAccess.unsafe.getLong(buffy, addressOffset); + } + + /** + * put byte and skip position update and boundary checks + * + * @param buffy + * @param b + */ + public static void putByte(long address, int position, byte b) { + UnsafeAccess.unsafe.putByte(address + (position << 0), b); + } + + public static void putByte(long address, byte b) { + UnsafeAccess.unsafe.putByte(address, b); + } + + public static ByteBuffer allocateAlignedByteBuffer(int capacity, long align) { + if (Long.bitCount(align) != 1) { + throw new IllegalArgumentException("Alignment must be a power of 2"); + } + // We over allocate by the alignment so we know we can have a large + // enough aligned block of memory to use. + ByteBuffer buffy = ByteBuffer.allocateDirect((int) (capacity + align)); + long address = getAddress(buffy); + if ((address & (align - 1)) == 0) { + // limit to the capacity specified + buffy.limit(capacity); + // set order to native while we are here. + ByteBuffer slice = buffy.slice().order(ByteOrder.nativeOrder()); + // the slice is now an aligned buffer of the required capacity + return slice; + } else { + int newPosition = (int) (align - (address & (align - 1))); + buffy.position(newPosition); + int newLimit = newPosition + capacity; + // limit to the capacity specified + buffy.limit(newLimit); + // set order to native while we are here. + ByteBuffer slice = buffy.slice().order(ByteOrder.nativeOrder()); + // the slice is now an aligned buffer of the required capacity + return slice; + } + } + + public static FloatBuffer allocateAlignedFloatBuffer(int capacity, long align) + { + return UnsafeDirectByteBuffer.allocateAlignedByteBuffer(SIZEOF_FLOAT*capacity,align).asFloatBuffer(); + } + + public static boolean isPageAligned(ByteBuffer buffy) { + return isPageAligned(getAddress(buffy)); + } + + /** + * This assumes cache line is 64b + */ + public static boolean isCacheAligned(ByteBuffer buffy) { + return isCacheAligned(getAddress(buffy)); + } + + public static boolean isPageAligned(long address) { + return (address & (PAGE_SIZE - 1)) == 0; + } + + /** + * This assumes cache line is 64b + */ + public static boolean isCacheAligned(long address) { + return (address & (CACHE_LINE_SIZE - 1)) == 0; + } + + public static boolean isAligned(long address, long align) { + if (Long.bitCount(align) != 1) { + throw new IllegalArgumentException("Alignment must be a power of 2"); + } + return (address & (align - 1)) == 0; + } +} + diff --git a/src/main/java/com/aparapi/examples/convolution/Convolution.java b/src/main/java/com/aparapi/examples/convolution/Convolution.java index c7ed5495c9cb103a1acca3a1a3fa3ac80beb5e9c..ef88a8adb6255ee0050809806a1744017a28422d 100644 --- a/src/main/java/com/aparapi/examples/convolution/Convolution.java +++ b/src/main/java/com/aparapi/examples/convolution/Convolution.java @@ -1,13 +1,3 @@ -/** - * This product currently only contains code developed by authors - * of specific components, as identified by the source code files. - * - * Since product implements StAX API, it has dependencies to StAX API - * classes. - * - * For additional credits (generally to people who reported problems) - * see CREDITS file. - */ /* Copyright (c) 2010-2011, Advanced Micro Devices, Inc. All rights reserved. @@ -44,87 +34,82 @@ to national security controls as identified on the Commerce Control List (curren of EAR). For the most current Country Group listings, or for additional information about the EAR or your obligations under those regulations, please refer to the U.S. Bureau of Industry and Security's website at http://www.bis.doc.gov/. - */ +*/ package com.aparapi.examples.convolution; -import com.aparapi.*; +import java.io.File; +import com.aparapi.Kernel; -import java.io.*; +public class Convolution{ -public class Convolution { + final static class ImageConvolution extends Kernel{ + private float convMatrix3x3[]; - public static void main(final String[] _args) throws IOException { + private int width, height; - final File file = new File(_args.length == 1 ? _args[0] : "./src/main/resources/testcard.jpg").getCanonicalFile(); + private byte imageIn[], imageOut[]; - final ImageConvolution convolution = new ImageConvolution(); + public void processPixel(int x, int y, int w, int h) { + float accum = 0f; + int count = 0; + for (int dx = -3; dx < 6; dx += 3) { + for (int dy = -1; dy < 2; dy += 1) { + int rgb = 0xff & imageIn[((y + dy) * w) + (x + dx)]; - final float convMatrix3x3[] = new float[] { - 0f, - -10f, - 0f, - -10f, - 40f, - -10f, - 0f, - -10f, - 0f, - }; - - new ConvolutionViewer(file, convMatrix3x3) { - - private static final long serialVersionUID = 7858079467616904028L; - - @Override - protected void applyConvolution(float[] _convMatrix3x3, byte[] _inBytes, byte[] _outBytes, int _width, - int _height) { - convolution.applyConvolution(_convMatrix3x3, _inBytes, _outBytes, _width, _height); + accum += rgb * convMatrix3x3[count++]; } - }; - } - - final static class ImageConvolution extends Kernel { - - private float convMatrix3x3[]; - - private int width, height; - - private byte imageIn[], imageOut[]; - - public void processPixel(int x, int y, int w, int h) { - float accum = 0f; - int count = 0; - for (int dx = -3; dx < 6; dx += 3) { - for (int dy = -1; dy < 2; dy += 1) { - final int rgb = 0xff & imageIn[((y + dy) * w) + (x + dx)]; - - accum += rgb * convMatrix3x3[count++]; - } - } - final byte value = (byte) (max(0, min((int) accum, 255))); - imageOut[(y * w) + x] = value; - - } - - @Override - public void run() { - final int x = getGlobalId(0) % (width * 3); - final int y = getGlobalId(0) / (width * 3); - - if ((x > 3) && (x < ((width * 3) - 3)) && (y > 1) && (y < (height - 1))) { - processPixel(x, y, width * 3, height); - } - - } - - public void applyConvolution(float[] _convMatrix3x3, byte[] _imageIn, byte[] _imageOut, int _width, int _height) { - imageIn = _imageIn; - imageOut = _imageOut; - width = _width; - height = _height; - convMatrix3x3 = _convMatrix3x3; - execute(3 * width * height); - } - } -} \ No newline at end of file + } + byte value = (byte) (max(0, min((int) accum, 255))); + imageOut[y * w + x] = value; + + } + + @Override public void run() { + int x = getGlobalId(0) % (width * 3); + int y = getGlobalId(0) / (width * 3); + + if (x > 3 && x < (width * 3 - 3) && y > 1 && y < (height - 1)) { + processPixel(x, y, width * 3, height); + } + + } + + public void applyConvolution(float[] _convMatrix3x3, byte[] _imageIn, byte[] _imageOut, int _width, int _height) { + imageIn = _imageIn; + imageOut = _imageOut; + width = _width; + height = _height; + convMatrix3x3 = _convMatrix3x3; + execute(3 * width * height); + } + + } + + public static void main(final String[] _args) { + File file = new File(_args.length == 1 ? _args[0] : "testcard.jpg"); + + final ImageConvolution convolution = new ImageConvolution(); + + float convMatrix3x3[] = new float[] { + 0f, + -10f, + 0f, + -10f, + 40f, + -10f, + 0f, + -10f, + 0f, + }; + + new ConvolutionViewer(file, convMatrix3x3){ + @Override protected void applyConvolution(float[] _convMatrix3x3, byte[] _inBytes, byte[] _outBytes, int _width, + int _height) { + convolution.applyConvolution(_convMatrix3x3, _inBytes, _outBytes, _width, _height); + } + }; + + } + +} diff --git a/src/main/java/com/aparapi/examples/extension/StopWatch.java b/src/main/java/com/aparapi/examples/extension/StopWatch.java index a5ac4b26913f59785258a452a26ab0f5f07ce267..2d365cdf9b18a358fc1d313c24374282c3030d1b 100644 --- a/src/main/java/com/aparapi/examples/extension/StopWatch.java +++ b/src/main/java/com/aparapi/examples/extension/StopWatch.java @@ -1,13 +1,3 @@ -/** - * This product currently only contains code developed by authors - * of specific components, as identified by the source code files. - * - * Since product implements StAX API, it has dependencies to StAX API - * classes. - * - * For additional credits (generally to people who reported problems) - * see CREDITS file. - */ package com.aparapi.examples.extension; public class StopWatch{ diff --git a/src/main/java/com/aparapi/examples/kmeans/KMeans.java b/src/main/java/com/aparapi/examples/kmeans/KMeans.java new file mode 100644 index 0000000000000000000000000000000000000000..6056197042ced5d9d9d2658c7d1aecdaaba25222 --- /dev/null +++ b/src/main/java/com/aparapi/examples/kmeans/KMeans.java @@ -0,0 +1,179 @@ +package com.hp.aparapi.sample.kmeans; + +import java.io.BufferedReader; +import java.io.BufferedWriter; +import java.io.IOException; +import java.io.FileReader; +import java.util.regex.Pattern; +import java.util.regex.Matcher; +import java.util.Scanner; +import java.util.HashMap; +import java.util.Map; +import java.util.ArrayList; + +import java.util.Random; + +import com.aparapi.Kernel; +import com.aparapi.Range; + +class KMeans { + + public int nrVectors; + public int nrClusters; + public int dim; + public float[] vectors; + public float[] clusters; + + public int[] clusterAssignedCPU; + public int[] clusterAssignedGPU; + + static public Random rand = new Random(); + + KMeans(int nrVectors, int nrClusters, int dim) { + this.nrVectors = nrVectors; + this.nrClusters = nrClusters; + this.dim = dim; + + this.vectors = new float[dim * nrVectors]; + this.clusters = new float[dim * nrClusters]; + + this.clusterAssignedCPU = new int[vectors.length]; + this.clusterAssignedGPU = new int[vectors.length]; + } + + void genVectors() { + float radius; + for (int i = 0; i < nrClusters * dim; i++) { + clusters[i] = 1000 * (float)rand.nextDouble(); + } + + // + // each cluster has stddev of 30 + // + for (int i = 0; i < nrVectors; i++) { + int clusterId = rand.nextInt(nrClusters); + for (int j = 0; j < dim; j++) { + vectors[i * dim + j] = clusters[clusterId * dim + j] + (float)rand.nextGaussian() * 30; + } + } + } + + void populateData() { + for (int i = 0; i < nrClusters * dim; i++) { + clusters[i] = clusters[i] * 2; + } + + for (int i = 0; i < nrVectors * dim; i++) { + vectors[i] = vectors[i] * 2; + } + + for (int i = 0; i < nrClusters * dim; i++) { + clusters[i] = clusters[i] / 2; + } + + for (int i = 0; i < nrVectors * dim; i++) { + vectors[i] = vectors[i] / 2; + } + } + + void runKmeansGPU() { + final int nrCluster = nrClusters; + final int vecSize = dim; + final int clusterVecSize = clusters.length; + final float[] inVecs = vectors; + final float[] inCluster = clusters; + final int[] clusterAssigned = clusterAssignedGPU; + + final Range range = Range.create(nrVectors, 16); + final float[] cluster_$local$ = new float[inCluster.length]; + + Kernel kernel = new Kernel(){ + + @Override + public void run() { + int gid = getGlobalId(0); + int lid = getLocalId(0); + int lsize = getLocalSize(0); + + for (int i = lid; i < clusterVecSize ; i += lsize) { + cluster_$local$[i] = inCluster[i]; + } + + localBarrier(); + + int vecBegin = gid * vecSize; + int minCluster = -1; + float minDist = Float.MAX_VALUE; + + for (int iC = 0; iC < nrCluster; iC++) { + float sum = 0.0f; + for (int iElm = 0; iElm < vecSize; iElm++) { + float diff = inVecs[vecBegin + iElm] - cluster_$local$[iC * vecSize + iElm]; + sum += diff * diff; + } + // !!! oren change -> this cmd will force use of doubles + //float dist = (float)Math.sqrt(sum); + float dist = sqrt(sum); + + if (dist < minDist) { + minCluster = iC; + minDist = dist; + } + } + + clusterAssigned[gid] = minCluster; + // this.atomicAdd(s0s, minCluster, 1); + + // int indexBegin = minCluster * vecSize; + // for (int i = 0; i < vecSize; i++) { + // float elm = inVecs[vecBegin + i]; + // s1s[indexBegin + i] += elm; + // s2s[indexBegin + i] += elm * elm; + // } + } + }; + + kernel.execute(range); + } + + void runKmeansCPU() { + final int nrCluster = nrClusters; + final int vecSize = dim; + final int clusterVecSize = clusters.length; + final float[] inVecs = vectors; + final float[] inCluster = clusters; + final int[] clusterAssigned = clusterAssignedCPU; + + for (int gid = 0; gid < nrVectors; gid++) { + int vecBegin = gid * vecSize; + int minCluster = -1; + float minDist = Float.MAX_VALUE; + + for (int iC = 0; iC < nrCluster; iC++) { + float sum = 0.0f; + for (int iElm = 0; iElm < vecSize; iElm++) { + float diff = inVecs[vecBegin + iElm] - inCluster[iC * vecSize + iElm]; + sum += diff * diff; + } + float dist = (float)Math.sqrt(sum); + + if (dist < minDist) { + minCluster = iC; + minDist = dist; + } + } + clusterAssigned[gid] = minCluster; + } + } + + void checkResults() { + for (int i = 0; i < clusterAssignedCPU.length; i++) { + if (clusterAssignedCPU[i] != clusterAssignedGPU[i]) { + System.err.println(String.format("Cluster differs: CPU[%d]=%d, GPU[%d]=%d", + i, clusterAssignedCPU[i], + i, clusterAssignedGPU[i])); + } + } + } + +} diff --git a/src/main/java/com/aparapi/examples/kmeans/Main.java b/src/main/java/com/aparapi/examples/kmeans/Main.java new file mode 100644 index 0000000000000000000000000000000000000000..0d7f922a433bc01177327fdc3ddd4420218573c8 --- /dev/null +++ b/src/main/java/com/aparapi/examples/kmeans/Main.java @@ -0,0 +1,77 @@ +/* +HP All rights reserved... + + +*/ + +package com.hp.aparapi.sample.kmeans; + +import java.awt.Color; +import java.awt.Dimension; +import java.awt.Graphics; +import java.awt.Point; +import java.awt.event.MouseAdapter; +import java.awt.event.MouseEvent; +import java.awt.event.WindowAdapter; +import java.awt.event.WindowEvent; +import java.awt.image.BufferedImage; +import java.awt.image.DataBufferInt; +import java.util.List; + +import javax.swing.JComponent; +import javax.swing.JFrame; + +import com.aparapi.Kernel; +import com.aparapi.ProfileInfo; +import com.aparapi.Range; + +/** + * An example Aparapi application which displays a view of the Mandelbrot set and lets the user zoom in to a particular point. + * + * When the user clicks on the view, this example application will zoom in to the clicked point and zoom out there after. + * On GPU, additional computing units will offer a better viewing experience. On the other hand on CPU, this example + * application might suffer with sub-optimal frame refresh rate as compared to GPU. + * + * @author gfrost + * + */ + +public class Main{ + + + @SuppressWarnings("serial") public static void main(String[] _args) + { + + int size = Integer.getInteger("size", (1 << 20)); + int clusters = Integer.getInteger("clusters", 20); + int dim = Integer.getInteger("dim", 20); + //Range range = Range.create(size); + System.out.println("Data size =" + size); + System.out.println("Num of Clusters =" + clusters); + System.out.println("Dim size =" + dim); + + //Range range = Range.create(512); + + KMeans km = new KMeans(size, clusters, dim); + km.genVectors(); + km.populateData(); + //km.runKmeansGPU(); + + long startMillis = System.currentTimeMillis(); + + // Set the scale and offset, execute the kernel and force a repaint of the viewer. + km.runKmeansGPU(); //.execute(range); + + long elapsedMillis = System.currentTimeMillis() - startMillis; + System.out.println("GPU - Elapsed time in milli = " + elapsedMillis); + + startMillis = System.currentTimeMillis(); + km.runKmeansCPU(); + elapsedMillis = System.currentTimeMillis() - startMillis; + System.out.println("CPU - Elapsed time in milli = " + elapsedMillis); + + km.checkResults(); + + } + + } diff --git a/src/main/java/com/aparapi/examples/life/Main.java b/src/main/java/com/aparapi/examples/life/Main.java index 263b906148c9741a2a6750a6c0c01ac8bde1c4ce..900b44c1211864ffc8b11c19c29cf9f12823bd4d 100644 --- a/src/main/java/com/aparapi/examples/life/Main.java +++ b/src/main/java/com/aparapi/examples/life/Main.java @@ -193,6 +193,9 @@ public class Main{ public static void main(String[] _args) { final JFrame frame = new JFrame("Game of Life"); + //final int width = 400;//Integer.getInteger("width", 1024 + 512 + 256 + 128); + + //final int height = 400;//Integer.getInteger("height", 768 + 256); final int width = Integer.getInteger("width", 1024 + 512 + 256 + 128); final int height = Integer.getInteger("height", 768 + 256); diff --git a/src/main/java/com/aparapi/examples/nbody/Main.java b/src/main/java/com/aparapi/examples/nbody/Main.java index 85e3f8f394894cd5d79d5e49d97d640b2d425b20..10e3fed8b119e2624d81d94a2aa41948e0d02ca3 100644 --- a/src/main/java/com/aparapi/examples/nbody/Main.java +++ b/src/main/java/com/aparapi/examples/nbody/Main.java @@ -210,6 +210,10 @@ public class Main{ public static boolean running; static Texture texture; + + // !!! oren fix. first time not calculated properly. Should be set on start!!! + private static long last = 0;//System.currentTimeMillis(); + public static void main(String _args[]) { @@ -228,6 +232,7 @@ public class Main{ startButton.addActionListener(new ActionListener(){ @Override public void actionPerformed(ActionEvent e) { running = true; + last = System.currentTimeMillis(); startButton.setEnabled(false); } }); @@ -277,8 +282,15 @@ public class Main{ public final float zoomFactor = 1.0f; private int frames; + + private float totalCalcPerMicroSec = 0.0f; + private float totalFramePerSec = 0.0f; + private int totalSamples = 0; + private boolean firstMeasurment=true; - private long last = System.currentTimeMillis(); + + // !!! oren fix. first time not calculated properly. Should be set on start!!! + //private long last = 0;//System.currentTimeMillis(); @Override public void dispose(GLAutoDrawable drawable) { @@ -305,7 +317,7 @@ public class Main{ final List<ProfileInfo> profileInfo = kernel.getProfileInfo(); if ((profileInfo != null) && (profileInfo.size() > 0)) { for (final ProfileInfo p : profileInfo) { - System.out.print(" " + p.getType() + " " + p.getLabel() + ((p.getEnd() - p.getStart()) / 1000) + "us"); + System.out.print(" " + p.getType() + " " + p.getLabel() + " " +((p.getEnd() - p.getStart()) / 1000) + "us"); } System.out.println(); } @@ -321,6 +333,21 @@ public class Main{ final float framesPerSecond = (frames * 1000.0f) / time; final int updatesPerMicroSecond = (int) ((framesPerSecond * kernel.range.getGlobalSize(0) * kernel.range .getGlobalSize(0)) / 1000000); + /// !!! oren change -> add avg calc + // the first calculation is never accurate when the system is initializing so discard it + if(!firstMeasurment) + { + totalCalcPerMicroSec += updatesPerMicroSecond; + totalFramePerSec += framesPerSecond; + totalSamples+=1;//(time/1000.0f); + // only print once every 10s + //if((int)totalTimeSec%10==0) + System.out.printf("last(CPMC,FPS)=(%d,%5.2f)",updatesPerMicroSecond,framesPerSecond); + System.out.printf("avg(CPMC,FPS)=(%5.2f,%5.2f)\n",totalCalcPerMicroSec/totalSamples,totalFramePerSec/totalSamples); + } + else + firstMeasurment = false; + ////////////////////////////////// framesPerSecondTextField.setText(String.format("%5.2f", framesPerSecond)); positionUpdatesPerMicroSecondTextField.setText(String.format("%4d", updatesPerMicroSecond)); } diff --git a/wiki-collateral/ProfilingKernelsFormEclipseProject.zip b/wiki-collateral/ProfilingKernelsFormEclipseProject.zip new file mode 100644 index 0000000000000000000000000000000000000000..28566548ee71557b596ae82e1aed6586f5c65bf0 Binary files /dev/null and b/wiki-collateral/ProfilingKernelsFormEclipseProject.zip differ