diff --git a/documentation/aparapi-patterns.html b/documentation/aparapi-patterns.html index 90ec2f6d4c9ea9d06fd6861e3a11b611d1e35dc7..4f554e6aac681b0cbe9b2a617cc3dcb9b3e6d129 100644 --- a/documentation/aparapi-patterns.html +++ b/documentation/aparapi-patterns.html @@ -1,240 +1,242 @@ <!DOCTYPE html> <html lang='en'> - <head> - <meta content='text/html; charset=UTF-8' http-equiv='Content-Type'> - <meta content='width=device-width, initial-scale=1' name='viewport'> - <meta content='IE=edge' http-equiv='X-UA-Compatible'> - <meta content='no' name='msapplication-tap-highlight'> - <meta content='Aparapi is an Open-source framework for executing native Java code on the GPU, developed by Syncleus.' name='description'> - <title> - Aparapi | Aparapi Patterns - </title> - <!-- Favicons --> - <link href='/images/favicon/apple-touch-icon-152x152.png' rel='apple-touch-icon-precomposed'> - <meta content='#FFFFFF' name='msapplication-TileColor'> - <meta content='/images/favicon/mstile-144x144.png' name='msapplication-TileImage'> - <link href='/images/favicon/favicon-32x32.png' rel='icon' sizes='32x32'> - <!-- Android 5 Chrome Color --> - <meta content='#EE6E73' name='theme-color'> - <!-- CSS --> - <link href='/stylesheets/highlight.css' media='screen,projection' rel='stylesheet' type='text/css'> - <link href='/stylesheets/style.css' media='screen,projection' rel='stylesheet' type='text/css'> - <link href='http://fonts.googleapis.com/css?family=Inconsolata' rel='stylesheet' type='text/css'> - <link href='http://fonts.googleapis.com/icon?family=Material+Icons' rel='stylesheet'> - </head> - <body> - <header> - <div class='container'> - <a class='button-collapse top-nav waves-effect waves-light circle hide-on-large-only' data-activates='nav-mobile' href='#'> - <i class='material-icons'>menu</i> - </a> - </div> - <ul class='side-nav fixed' id='nav-mobile'> - <li class='logo'> - <a class='brand-logo' href='/' id='logo-container'> - <object data='/images/logo.svg' id='front-page-logo' type='image/svg+xml'>Your browser does not support SVG</object> - </a> - </li> - <li class='search'> - <div class='search-wrapper card'> - <input id='search'> - <i class='material-icons'>search</i> - <div class='search-results'></div> - </div> - </li> - <li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/">Overview</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Introduction</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/about.html">About</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/getting-started.html">Getting Started</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/faq.html">FAQ</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Documentation</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/aparapi-patterns.html">Aparapi Patterns</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/choosing-specific-devices.html">Choosing Specific Devices</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/converting-java-to-opencl.html">Converting Java to OpenCL</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/emulating-multiple-entrypoints.html">Emulating Multiple Entrypoints</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/explicit-buffer-handling.html">Explicit Buffer Handling</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/hsa-enabled-lambda.html">HSA Enabled Lambda</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/kernel-guidelines.html">Kernel Guidelines</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/library-agent-duality.html">Library Agent Duality</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/new-features.html">New Features</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/opencl-bindings.html">OpenCL Bindings</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/private-memory-space.html">Private Memory Space</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/profiling-the-kernel.html">Profiling the Kernel</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/setting-up-hsa.html">Setting Up HSA</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/unit-tests.html">Unit Tests</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/using-hsa-simulator.html">Using HSA Simulator</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/constant-memory.html">Constant Memory</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/local-memory.html">Local Memory</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/multiple-dim-ranges.html">Multiple Dim Ranges</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Proposals</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/multiple-dim-nd-range.html">Multiple Dim ND Range</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/lambdas.html">Lambdas</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/address-space-with-buffers.html">Address Space with Buffers</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/extensions.html">Extensions</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/device.html">Device</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/multiple-entry-points.html">Multiple Entry Points</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/lambda-syntax.html">Lambda Syntax</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/showcase.html">Showcase</a></li></ul></li> - </ul> - </header> - <main> - <div class='section no-pad-bot' id='index-banner'> - <div class='container'> - <h1 class='header center-on-small-only'>Aparapi Patterns</h1> - <div class='row center'> - <h4 class='header col s12 light center'>Examples and code fragments to demonstrate Aparapi features.</h4> - </div> - - </div> - - </div> - <div class='container'> - <h2>Aparapi Patterns</h2> - - <p>The following suggestions help solve some common problems found in using Aparapi.</p> - - <p>Additional suggestions and solutions to extend this list would be welcome.</p> - - <h2>How do I return data from a kernel if I can’t write to kernel fields?</h2> - - <p>Use a small array buffer (possibly containing a single element) and assign it from the kernel.</p> - - <p>For example, the following kernel code detects whether the <code>buffer[]</code> contains the value <code>1234</code>. The flag (true or false) is returned in <code>found[0]</code>.</p> - <pre class="highlight java"><code> - <span class="kd">final</span> <span class="kt">int</span> <span class="n">buffer</span><span class="o">[]</span> <span class="o">=</span> <span class="k">new</span> <span class="kt">int</span><span class="o">[</span><span class="n">HUGE</span><span class="o">];</span> - <span class="kd">final</span> <span class="kt">boolean</span> <span class="n">found</span><span class="o">[]</span> <span class="o">=</span> <span class="k">new</span> <span class="kt">boolean</span><span class="o">[]{</span><span class="kc">false</span><span class="o">};</span> - <span class="c1">// fill buffer somehow</span> - <span class="n">kernel</span> <span class="n">kernel</span> <span class="o">=</span> <span class="k">new</span> <span class="n">kernel</span><span class="o">(){</span> - <span class="nd">@Override</span> <span class="kd">public</span> <span class="kt">void</span> <span class="nf">run</span><span class="o">(){</span> - <span class="k">if</span> <span class="o">(</span><span class="n">buffer</span><span class="o">[</span><span class="n">getGlobald</span><span class="o">()]==</span><span class="mi">1234</span><span class="o">){</span> - <span class="n">found</span><span class="o">[</span><span class="mi">0</span><span class="o">]=</span><span class="kc">true</span><span class="o">;</span> - <span class="o">}</span> - <span class="o">}</span> - <span class="o">};</span> - <span class="n">kernel</span><span class="o">.</span><span class="na">execute</span><span class="o">(</span><span class="n">buffer</span><span class="o">.</span><span class="na">length</span><span class="o">);</span> - </code></pre> - <p>This code does include a race condition, whereby more than one value of <code>Kernel.getGlobalId()</code> might contain 1234 and try to set <code>found[0]</code>. This is not a problem here, because we don’t care if multiple kernel executions match, provided one flips the value of <code>found[0]</code>.</p> - - <h2>How can I use Aparapi and still maintain an object-oriented view of my data?</h2> - - <p>See the NewFeatures page. Aparapi can now handle simple arrays of objects, which minimizes the amount of refactoring required to experiment with Aparapi. However, performance is still likely to be better if your algorithm operates on data held in parallel primitive arrays. To get higher performance from Aparapi with minimal exposure to data in this parallel primitive array form, we can (with a little work) allow both forms of data to co-exist. Let’s reconsider <a href="http://en.wikipedia.org/wiki/N-body_problem" rel="nofollow">the NBody problem</a>.</p> - - <p>A Java developer writing an NBody solution would most likely create a Body class:</p> - <pre class="highlight java"><code> - <span class="kd">class</span> <span class="nc">Body</span><span class="o">{</span> - <span class="kt">float</span> <span class="n">x</span><span class="o">,</span><span class="n">y</span><span class="o">,</span><span class="n">z</span><span class="o">;</span> - <span class="kt">float</span> <span class="nf">getX</span><span class="o">(){</span><span class="k">return</span> <span class="n">x</span><span class="o">;}</span> - <span class="kt">void</span> <span class="nf">setX</span><span class="o">(</span><span class="kt">float</span> <span class="n">_x</span><span class="o">){</span> <span class="n">x</span> <span class="o">=</span> <span class="n">_x</span><span class="o">;}</span> - <span class="kt">float</span> <span class="nf">getY</span><span class="o">(){</span><span class="k">return</span> <span class="n">y</span><span class="o">;}</span> - <span class="kt">void</span> <span class="nf">setY</span><span class="o">(</span><span class="kt">float</span> <span class="n">_y</span><span class="o">){</span> <span class="n">y</span> <span class="o">=</span> <span class="n">_y</span><span class="o">;}</span> - <span class="kt">float</span> <span class="nf">getZ</span><span class="o">(){</span><span class="k">return</span> <span class="n">z</span><span class="o">;}</span> - <span class="kt">void</span> <span class="nf">setZ</span><span class="o">(</span><span class="kt">float</span> <span class="n">_z</span><span class="o">){</span> <span class="n">z</span> <span class="o">=</span> <span class="n">_z</span><span class="o">;}</span> - - - <span class="c1">// other data related to Body unused by positioning calculations</span> +<head> +<meta content='text/html; charset=UTF-8' http-equiv='Content-Type'> +<meta content='width=device-width, initial-scale=1' name='viewport'> +<meta content='IE=edge' http-equiv='X-UA-Compatible'> +<meta content='no' name='msapplication-tap-highlight'> +<meta content='Aparapi is an Open-source framework for executing native Java code on the GPU, developed by Syncleus.' name='description'> +<title> +Aparapi | Aparapi Patterns +</title> +<!-- Favicons --> +<link href='/images/favicon/apple-touch-icon-152x152.png' rel='apple-touch-icon-precomposed'> +<meta content='#FFFFFF' name='msapplication-TileColor'> +<meta content='/images/favicon/mstile-144x144.png' name='msapplication-TileImage'> +<link href='/images/favicon/favicon-32x32.png' rel='icon' sizes='32x32'> +<!-- Android 5 Chrome Color --> +<meta content='#EE6E73' name='theme-color'> +<!-- CSS --> +<link href='/stylesheets/highlight.css' media='screen,projection' rel='stylesheet' type='text/css'> +<link href='/stylesheets/style.css' media='screen,projection' rel='stylesheet' type='text/css'> +<link href='http://fonts.googleapis.com/css?family=Inconsolata' rel='stylesheet' type='text/css'> +<link href='http://fonts.googleapis.com/icon?family=Material+Icons' rel='stylesheet'> +</head> +<body> +<header> +<div class='container'> +<a class='button-collapse top-nav waves-effect waves-light circle hide-on-large-only' data-activates='nav-mobile' href='#'> +<i class='material-icons'>menu</i> +</a> +</div> +<ul class='side-nav fixed' id='nav-mobile'> +<li class='logo'> +<a class='brand-logo' href='/' id='logo-container'> +<object data='/images/logo.svg' id='front-page-logo' type='image/svg+xml'>Your browser does not support SVG</object> +</a> +</li> +<li class='search'> +<div class='search-wrapper card'> +<input id='search'> +<i class='material-icons'>search</i> +<div class='search-results'></div> +</div> +</li> +<li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/">Overview</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Introduction</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/about.html">About</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/getting-started.html">Getting Started</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/faq.html">FAQ</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Documentation</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/aparapi-patterns.html">Aparapi Patterns</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/choosing-specific-devices.html">Choosing Specific Devices</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/converting-java-to-opencl.html">Converting Java to OpenCL</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/emulating-multiple-entrypoints.html">Emulating Multiple Entrypoints</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/explicit-buffer-handling.html">Explicit Buffer Handling</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/hsa-enabled-lambda.html">HSA Enabled Lambda</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/kernel-guidelines.html">Kernel Guidelines</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/library-agent-duality.html">Library Agent Duality</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/new-features.html">New Features</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/opencl-bindings.html">OpenCL Bindings</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/private-memory-space.html">Private Memory Space</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/profiling-the-kernel.html">Profiling the Kernel</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/setting-up-hsa.html">Setting Up HSA</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/unit-tests.html">Unit Tests</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/using-hsa-simulator.html">Using HSA Simulator</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/constant-memory.html">Constant Memory</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/local-memory.html">Local Memory</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/multiple-dim-ranges.html">Multiple Dim Ranges</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Proposals</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/multiple-dim-nd-range.html">Multiple Dim ND Range</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/lambdas.html">Lambdas</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/address-space-with-buffers.html">Address Space with Buffers</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/extensions.html">Extensions</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/device.html">Device</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/multiple-entry-points.html">Multiple Entry Points</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/lambda-syntax.html">Lambda Syntax</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/showcase.html">Showcase</a></li></ul></li> +</ul> +</header> +<main> +<div class='section no-pad-bot' id='index-banner'> +<div class='container'> +<h1 class='header center-on-small-only'>Aparapi Patterns</h1> +<div class='row center'> +<h4 class='header col s12 light center'>Examples and code fragments to demonstrate Aparapi features.</h4> +</div> + +</div> + +</div> +<div class='container'> +<h2>Aparapi Patterns</h2> + +<p>The following suggestions help solve some common problems found in using Aparapi.</p> + +<p>Additional suggestions and solutions to extend this list would be welcome.</p> + +<h2>How do I return data from a kernel if I can’t write to kernel fields?</h2> + +<p>Use a small array buffer (possibly containing a single element) and assign it from the kernel.</p> + +<p>For example, the following kernel code detects whether the <code>buffer[]</code> contains the value <code>1234</code>. The flag (true or false) is returned in <code>found[0]</code>.</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="kd">final</span> <span class="kt">int</span> <span class="n">buffer</span><span class="o">[]</span> <span class="o">=</span> <span class="k">new</span> <span class="kt">int</span><span class="o">[</span><span class="n">HUGE</span><span class="o">];</span> +<span class="kd">final</span> <span class="kt">boolean</span> <span class="n">found</span><span class="o">[]</span> <span class="o">=</span> <span class="k">new</span> <span class="kt">boolean</span><span class="o">[]{</span><span class="kc">false</span><span class="o">};</span> +<span class="c1">// fill buffer somehow</span> + <span class="n">kernel</span> <span class="n">kernel</span> <span class="o">=</span> <span class="k">new</span> <span class="n">kernel</span><span class="o">(){</span> + <span class="nd">@Override</span> <span class="kd">public</span> <span class="kt">void</span> <span class="nf">run</span><span class="o">(){</span> + <span class="k">if</span> <span class="o">(</span><span class="n">buffer</span><span class="o">[</span><span class="n">getGlobald</span><span class="o">()]==</span><span class="mi">1234</span><span class="o">){</span> + <span class="n">found</span><span class="o">[</span><span class="mi">0</span><span class="o">]=</span><span class="kc">true</span><span class="o">;</span> + <span class="o">}</span> + <span class="o">}</span> +<span class="o">};</span> +<span class="n">kernel</span><span class="o">.</span><span class="na">execute</span><span class="o">(</span><span class="n">buffer</span><span class="o">.</span><span class="na">length</span><span class="o">);</span> +</code></pre></div> +<p>This code does include a race condition, whereby more than one value of <code>Kernel.getGlobalId()</code> might contain 1234 and try to set <code>found[0]</code>. This is not a problem here, because we don’t care if multiple kernel executions match, provided one flips the value of <code>found[0]</code>.</p> + +<h2>How can I use Aparapi and still maintain an object-oriented view of my data?</h2> + +<p>See the NewFeatures page. Aparapi can now handle simple arrays of objects, which minimizes the amount of refactoring required to experiment with Aparapi. However, performance is still likely to be better if your algorithm operates on data held in parallel primitive arrays. To get higher performance from Aparapi with minimal exposure to data in this parallel primitive array form, we can (with a little work) allow both forms of data to co-exist. Let’s reconsider <a href="http://en.wikipedia.org/wiki/N-body_problem" rel="nofollow">the NBody problem</a>.</p> + +<p>A Java developer writing an NBody solution would most likely create a Body class:</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="kd">class</span> <span class="nc">Body</span><span class="o">{</span> + <span class="kt">float</span> <span class="n">x</span><span class="o">,</span><span class="n">y</span><span class="o">,</span><span class="n">z</span><span class="o">;</span> + <span class="kt">float</span> <span class="nf">getX</span><span class="o">(){</span><span class="k">return</span> <span class="n">x</span><span class="o">;}</span> + <span class="kt">void</span> <span class="nf">setX</span><span class="o">(</span><span class="kt">float</span> <span class="n">_x</span><span class="o">){</span> <span class="n">x</span> <span class="o">=</span> <span class="n">_x</span><span class="o">;}</span> + <span class="kt">float</span> <span class="nf">getY</span><span class="o">(){</span><span class="k">return</span> <span class="n">y</span><span class="o">;}</span> + <span class="kt">void</span> <span class="nf">setY</span><span class="o">(</span><span class="kt">float</span> <span class="n">_y</span><span class="o">){</span> <span class="n">y</span> <span class="o">=</span> <span class="n">_y</span><span class="o">;}</span> + <span class="kt">float</span> <span class="nf">getZ</span><span class="o">(){</span><span class="k">return</span> <span class="n">z</span><span class="o">;}</span> + <span class="kt">void</span> <span class="nf">setZ</span><span class="o">(</span><span class="kt">float</span> <span class="n">_z</span><span class="o">){</span> <span class="n">z</span> <span class="o">=</span> <span class="n">_z</span><span class="o">;}</span> + + + <span class="c1">// other data related to Body unused by positioning calculations</span> +<span class="o">}</span> +</code></pre></div> +<p>The developer would also likely create a container class (such as NBodyUniverse), that manages the positions of multiple Body instances.</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="kd">class</span> <span class="nc">NBodyUniverse</span><span class="o">{</span> + <span class="kd">final</span> <span class="n">Body</span><span class="o">[]</span> <span class="n">bodies</span> <span class="o">=</span> <span class="kc">null</span><span class="o">;</span> + <span class="n">NBodyUniverse</span><span class="o">(</span><span class="kd">final</span> <span class="n">Bodies</span> <span class="n">_bodies</span><span class="o">[]){</span> + <span class="n">bodies</span> <span class="o">=</span> <span class="n">_bodies</span><span class="o">;</span> + <span class="k">for</span> <span class="o">(</span><span class="kt">int</span> <span class="n">i</span><span class="o">=</span><span class="mi">0</span><span class="o">;</span> <span class="n">i</span><span class="o"><</span><span class="n">bodies</span><span class="o">.</span><span class="na">length</span><span class="o">;</span> <span class="n">i</span><span class="o">++){</span> + <span class="n">bodies</span><span class="o">[</span><span class="n">i</span><span class="o">].</span><span class="na">setX</span><span class="o">(</span><span class="n">Math</span><span class="o">.</span><span class="na">random</span><span class="o">()*</span><span class="mi">100</span><span class="o">);</span> + <span class="n">bodies</span><span class="o">[</span><span class="n">i</span><span class="o">].</span><span class="na">setY</span><span class="o">(</span><span class="n">Math</span><span class="o">.</span><span class="na">random</span><span class="o">()*</span><span class="mi">100</span><span class="o">);</span> + <span class="n">bodies</span><span class="o">[</span><span class="n">i</span><span class="o">].</span><span class="na">setZ</span><span class="o">(</span><span class="n">Math</span><span class="o">.</span><span class="na">random</span><span class="o">()*</span><span class="mi">100</span><span class="o">);</span> <span class="o">}</span> - </code></pre> - <p>The developer would also likely create a container class (such as NBodyUniverse), that manages the positions of multiple Body instances.</p> - <pre class="highlight java"><code> - <span class="kd">class</span> <span class="nc">NBodyUniverse</span><span class="o">{</span> - <span class="kd">final</span> <span class="n">Body</span><span class="o">[]</span> <span class="n">bodies</span> <span class="o">=</span> <span class="kc">null</span><span class="o">;</span> - <span class="n">NBodyUniverse</span><span class="o">(</span><span class="kd">final</span> <span class="n">Bodies</span> <span class="n">_bodies</span><span class="o">[]){</span> - <span class="n">bodies</span> <span class="o">=</span> <span class="n">_bodies</span><span class="o">;</span> - <span class="k">for</span> <span class="o">(</span><span class="kt">int</span> <span class="n">i</span><span class="o">=</span><span class="mi">0</span><span class="o">;</span> <span class="n">i</span><span class="o"><</span><span class="n">bodies</span><span class="o">.</span><span class="na">length</span><span class="o">;</span> <span class="n">i</span><span class="o">++){</span> - <span class="n">bodies</span><span class="o">[</span><span class="n">i</span><span class="o">].</span><span class="na">setX</span><span class="o">(</span><span class="n">Math</span><span class="o">.</span><span class="na">random</span><span class="o">()*</span><span class="mi">100</span><span class="o">);</span> - <span class="n">bodies</span><span class="o">[</span><span class="n">i</span><span class="o">].</span><span class="na">setY</span><span class="o">(</span><span class="n">Math</span><span class="o">.</span><span class="na">random</span><span class="o">()*</span><span class="mi">100</span><span class="o">);</span> - <span class="n">bodies</span><span class="o">[</span><span class="n">i</span><span class="o">].</span><span class="na">setZ</span><span class="o">(</span><span class="n">Math</span><span class="o">.</span><span class="na">random</span><span class="o">()*</span><span class="mi">100</span><span class="o">);</span> - <span class="o">}</span> - <span class="o">}</span> - <span class="kt">void</span> <span class="nf">adjustPositions</span><span class="o">(){</span> - <span class="c1">// can use new array of object Aparapi features, but is not performant</span> - <span class="o">}</span> + <span class="o">}</span> + <span class="kt">void</span> <span class="nf">adjustPositions</span><span class="o">(){</span> + <span class="c1">// can use new array of object Aparapi features, but is not performant</span> + <span class="o">}</span> +<span class="o">}</span> +<span class="n">Body</span> <span class="n">bodies</span> <span class="o">=</span> <span class="k">new</span> <span class="n">Body</span><span class="o">[</span><span class="n">BODIES</span><span class="o">];</span> +<span class="k">for</span> <span class="o">(</span><span class="kt">int</span> <span class="n">i</span><span class="o">=</span><span class="mi">0</span><span class="o">;</span> <span class="n">i</span><span class="o"><</span><span class="n">bodies</span><span class="o">;</span> <span class="n">i</span><span class="o">++){</span> + <span class="n">bodies</span><span class="o">[</span><span class="n">i</span><span class="o">]</span> <span class="o">=</span> <span class="k">new</span> <span class="n">Body</span><span class="o">();</span> +<span class="o">}</span> +<span class="n">NBodyUniverse</span> <span class="n">universe</span> <span class="o">=</span> <span class="k">new</span> <span class="n">NBodyUniverse</span><span class="o">(</span><span class="n">bodies</span><span class="o">);</span> +<span class="k">while</span> <span class="o">(</span><span class="kc">true</span><span class="o">){</span> + <span class="n">universe</span><span class="o">.</span><span class="na">adjustPositions</span><span class="o">();</span> + <span class="c1">// display NBodyUniverse</span> +<span class="o">}</span> +</code></pre></div> +<p>The <code>NBodyUniverse.adjustPostions()</code> method contains the nested loops (adjusting each body position based on forces impinging on it from all of the other bodies), making it an ideal Aparapi candidate.</p> + +<p>Even though this code can now be written by accessing the x, y and z ordinates of <code>Body[]</code> via getters/setters, the most performant Aparapi implementation is the one that operates on parallel arrays of floats containing x, y and z ordinates, with <code>Body[10]</code>’s state conceptually stored across <code>x[10]</code>, <code>y[10]</code> and <code>z[10]</code>.</p> + +<p>So for performance reasons, you can do something like this:</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="kd">class</span> <span class="nc">Body</span><span class="o">{</span> + <span class="kt">int</span> <span class="n">idx</span><span class="o">;</span> + <span class="n">NBodyUniverse</span> <span class="n">universe</span><span class="o">;</span> + <span class="kt">void</span> <span class="nf">setUniverseAndIndex</span><span class="o">(</span><span class="n">NBodyUniverse</span> <span class="n">_universe</span><span class="o">,</span> <span class="kt">int</span> <span class="n">_idx</span><span class="o">){</span> + <span class="n">universe</span> <span class="o">=</span> <span class="n">_universe</span><span class="o">;</span> + <span class="n">idx</span> <span class="o">=</span> <span class="n">_idx</span><span class="o">;</span> + <span class="o">}</span> + + <span class="c1">// other fields not used by layout</span> + + <span class="kt">void</span> <span class="nf">setX</span><span class="o">(</span><span class="kt">float</span> <span class="n">_x</span><span class="o">){</span> <span class="n">layout</span><span class="o">.</span><span class="na">x</span><span class="o">[</span><span class="n">idx</span><span class="o">]=</span><span class="n">_x</span><span class="o">;}</span> + <span class="kt">void</span> <span class="nf">setY</span><span class="o">(</span><span class="kt">float</span> <span class="n">_y</span><span class="o">){</span> <span class="n">layout</span><span class="o">.</span><span class="na">y</span><span class="o">[</span><span class="n">idx</span><span class="o">]=</span><span class="n">_y</span><span class="o">;}</span> + <span class="kt">void</span> <span class="nf">setZ</span><span class="o">(</span><span class="kt">float</span> <span class="n">_z</span><span class="o">){</span> <span class="n">layout</span><span class="o">.</span><span class="na">z</span><span class="o">[</span><span class="n">idx</span><span class="o">]=</span><span class="n">_z</span><span class="o">;}</span> + <span class="kt">float</span> <span class="nf">getX</span><span class="o">(){</span> <span class="k">return</span> <span class="n">layout</span><span class="o">.</span><span class="na">x</span><span class="o">[</span><span class="n">idx</span><span class="o">];}</span> + <span class="kt">float</span> <span class="nf">getY</span><span class="o">(){</span> <span class="k">return</span> <span class="n">layout</span><span class="o">.</span><span class="na">y</span><span class="o">[</span><span class="n">idx</span><span class="o">];}</span> + <span class="kt">float</span> <span class="nf">getZ</span><span class="o">(){</span> <span class="k">return</span> <span class="n">layout</span><span class="o">.</span><span class="na">z</span><span class="o">[</span><span class="n">idx</span><span class="o">];}</span> +<span class="o">}</span> +<span class="kd">class</span> <span class="nc">NBodyUniverse</span> <span class="o">{</span> + <span class="kd">final</span> <span class="n">Body</span><span class="o">[]</span> <span class="n">bodies</span><span class="o">;</span> + <span class="kd">final</span> <span class="kt">int</span><span class="o">[]</span> <span class="n">x</span><span class="o">,</span> <span class="n">y</span><span class="o">,</span> <span class="n">z</span><span class="o">;</span> + <span class="n">NBodyUniverse</span><span class="o">(</span><span class="n">Body</span><span class="o">[]</span> <span class="n">_bodies</span><span class="o">){</span> + <span class="n">bodies</span> <span class="o">=</span> <span class="n">_bodies</span><span class="o">;</span> + <span class="k">for</span> <span class="o">(</span><span class="kt">int</span> <span class="n">i</span><span class="o">=</span><span class="mi">0</span><span class="o">;</span> <span class="n">i</span><span class="o"><</span><span class="n">bodies</span><span class="o">.</span><span class="na">length</span><span class="o">;</span> <span class="n">i</span><span class="o">++){</span> + <span class="n">bodies</span><span class="o">[</span><span class="n">i</span><span class="o">].</span><span class="na">setUniverseAndIndex</span><span class="o">(</span><span class="k">this</span><span class="o">,</span> <span class="n">i</span><span class="o">);</span> + <span class="n">bodies</span><span class="o">[</span><span class="n">i</span><span class="o">].</span><span class="na">setX</span><span class="o">(</span><span class="n">Math</span><span class="o">.</span><span class="na">random</span><span class="o">()*</span><span class="mi">100</span><span class="o">);</span> + <span class="n">bodies</span><span class="o">[</span><span class="n">i</span><span class="o">].</span><span class="na">setY</span><span class="o">(</span><span class="n">Math</span><span class="o">.</span><span class="na">random</span><span class="o">()*</span><span class="mi">100</span><span class="o">);</span> + <span class="n">bodies</span><span class="o">[</span><span class="n">i</span><span class="o">].</span><span class="na">setZ</span><span class="o">(</span><span class="n">Math</span><span class="o">.</span><span class="na">random</span><span class="o">()*</span><span class="mi">100</span><span class="o">);</span> <span class="o">}</span> - <span class="n">Body</span> <span class="n">bodies</span> <span class="o">=</span> <span class="k">new</span> <span class="n">Body</span><span class="o">[</span><span class="n">BODIES</span><span class="o">];</span> - <span class="k">for</span> <span class="o">(</span><span class="kt">int</span> <span class="n">i</span><span class="o">=</span><span class="mi">0</span><span class="o">;</span> <span class="n">i</span><span class="o"><</span><span class="n">bodies</span><span class="o">;</span> <span class="n">i</span><span class="o">++){</span> - <span class="n">bodies</span><span class="o">[</span><span class="n">i</span><span class="o">]</span> <span class="o">=</span> <span class="k">new</span> <span class="n">Body</span><span class="o">();</span> - <span class="o">}</span> - <span class="n">NBodyUniverse</span> <span class="n">universe</span> <span class="o">=</span> <span class="k">new</span> <span class="n">NBodyUniverse</span><span class="o">(</span><span class="n">bodies</span><span class="o">);</span> - <span class="k">while</span> <span class="o">(</span><span class="kc">true</span><span class="o">){</span> - <span class="n">universe</span><span class="o">.</span><span class="na">adjustPositions</span><span class="o">();</span> - <span class="c1">// display NBodyUniverse</span> - <span class="o">}</span> - </code></pre> - <p>The <code>NBodyUniverse.adjustPostions()</code> method contains the nested loops (adjusting each body position based on forces impinging on it from all of the other bodies), making it an ideal Aparapi candidate.</p> - - <p>Even though this code can now be written by accessing the x, y and z ordinates of <code>Body[]</code> via getters/setters, the most performant Aparapi implementation is the one that operates on parallel arrays of floats containing x, y and z ordinates, with <code>Body[10]</code>’s state conceptually stored across <code>x[10]</code>, <code>y[10]</code> and <code>z[10]</code>.</p> - - <p>So for performance reasons, you can do something like this:</p> - <pre class="highlight java"><code> - <span class="kd">class</span> <span class="nc">Body</span><span class="o">{</span> - <span class="kt">int</span> <span class="n">idx</span><span class="o">;</span> - <span class="n">NBodyUniverse</span> <span class="n">universe</span><span class="o">;</span> - <span class="kt">void</span> <span class="nf">setUniverseAndIndex</span><span class="o">(</span><span class="n">NBodyUniverse</span> <span class="n">_universe</span><span class="o">,</span> <span class="kt">int</span> <span class="n">_idx</span><span class="o">){</span> - <span class="n">universe</span> <span class="o">=</span> <span class="n">_universe</span><span class="o">;</span> - <span class="n">idx</span> <span class="o">=</span> <span class="n">_idx</span><span class="o">;</span> - <span class="o">}</span> - - <span class="c1">// other fields not used by layout</span> - - <span class="kt">void</span> <span class="nf">setX</span><span class="o">(</span><span class="kt">float</span> <span class="n">_x</span><span class="o">){</span> <span class="n">layout</span><span class="o">.</span><span class="na">x</span><span class="o">[</span><span class="n">idx</span><span class="o">]=</span><span class="n">_x</span><span class="o">;}</span> - <span class="kt">void</span> <span class="nf">setY</span><span class="o">(</span><span class="kt">float</span> <span class="n">_y</span><span class="o">){</span> <span class="n">layout</span><span class="o">.</span><span class="na">y</span><span class="o">[</span><span class="n">idx</span><span class="o">]=</span><span class="n">_y</span><span class="o">;}</span> - <span class="kt">void</span> <span class="nf">setZ</span><span class="o">(</span><span class="kt">float</span> <span class="n">_z</span><span class="o">){</span> <span class="n">layout</span><span class="o">.</span><span class="na">z</span><span class="o">[</span><span class="n">idx</span><span class="o">]=</span><span class="n">_z</span><span class="o">;}</span> - <span class="kt">float</span> <span class="nf">getX</span><span class="o">(){</span> <span class="k">return</span> <span class="n">layout</span><span class="o">.</span><span class="na">x</span><span class="o">[</span><span class="n">idx</span><span class="o">];}</span> - <span class="kt">float</span> <span class="nf">getY</span><span class="o">(){</span> <span class="k">return</span> <span class="n">layout</span><span class="o">.</span><span class="na">y</span><span class="o">[</span><span class="n">idx</span><span class="o">];}</span> - <span class="kt">float</span> <span class="nf">getZ</span><span class="o">(){</span> <span class="k">return</span> <span class="n">layout</span><span class="o">.</span><span class="na">z</span><span class="o">[</span><span class="n">idx</span><span class="o">];}</span> - <span class="o">}</span> - <span class="kd">class</span> <span class="nc">NBodyUniverse</span> <span class="o">{</span> - <span class="kd">final</span> <span class="n">Body</span><span class="o">[]</span> <span class="n">bodies</span><span class="o">;</span> - <span class="kd">final</span> <span class="kt">int</span><span class="o">[]</span> <span class="n">x</span><span class="o">,</span> <span class="n">y</span><span class="o">,</span> <span class="n">z</span><span class="o">;</span> - <span class="n">NBodyUniverse</span><span class="o">(</span><span class="n">Body</span><span class="o">[]</span> <span class="n">_bodies</span><span class="o">){</span> - <span class="n">bodies</span> <span class="o">=</span> <span class="n">_bodies</span><span class="o">;</span> - <span class="k">for</span> <span class="o">(</span><span class="kt">int</span> <span class="n">i</span><span class="o">=</span><span class="mi">0</span><span class="o">;</span> <span class="n">i</span><span class="o"><</span><span class="n">bodies</span><span class="o">.</span><span class="na">length</span><span class="o">;</span> <span class="n">i</span><span class="o">++){</span> - <span class="n">bodies</span><span class="o">[</span><span class="n">i</span><span class="o">].</span><span class="na">setUniverseAndIndex</span><span class="o">(</span><span class="k">this</span><span class="o">,</span> <span class="n">i</span><span class="o">);</span> - <span class="n">bodies</span><span class="o">[</span><span class="n">i</span><span class="o">].</span><span class="na">setX</span><span class="o">(</span><span class="n">Math</span><span class="o">.</span><span class="na">random</span><span class="o">()*</span><span class="mi">100</span><span class="o">);</span> - <span class="n">bodies</span><span class="o">[</span><span class="n">i</span><span class="o">].</span><span class="na">setY</span><span class="o">(</span><span class="n">Math</span><span class="o">.</span><span class="na">random</span><span class="o">()*</span><span class="mi">100</span><span class="o">);</span> - <span class="n">bodies</span><span class="o">[</span><span class="n">i</span><span class="o">].</span><span class="na">setZ</span><span class="o">(</span><span class="n">Math</span><span class="o">.</span><span class="na">random</span><span class="o">()*</span><span class="mi">100</span><span class="o">);</span> - <span class="o">}</span> - <span class="o">}</span> - <span class="kt">void</span> <span class="nf">adjustPositions</span><span class="o">(){</span> - <span class="c1">// can now more efficiently use Aparapi</span> - <span class="o">}</span> - <span class="o">}</span> - - - - <span class="n">Body</span> <span class="n">bodies</span> <span class="o">=</span> <span class="k">new</span> <span class="n">Body</span><span class="o">[</span><span class="n">BODIES</span><span class="o">];</span> - <span class="k">for</span> <span class="o">(</span><span class="kt">int</span> <span class="n">i</span><span class="o">=</span><span class="mi">0</span><span class="o">;</span> <span class="n">i</span><span class="o"><</span><span class="n">bodies</span><span class="o">;</span> <span class="n">i</span><span class="o">++){</span> - <span class="n">bodies</span><span class="o">[</span><span class="n">i</span><span class="o">]</span> <span class="o">=</span> <span class="k">new</span> <span class="n">Body</span><span class="o">();</span> - <span class="o">}</span> - <span class="n">NBodyUniverse</span> <span class="n">universe</span> <span class="o">=</span> <span class="k">new</span> <span class="n">NBodyUniverse</span><span class="o">(</span><span class="n">bodies</span><span class="o">);</span> - <span class="k">while</span> <span class="o">(</span><span class="kc">true</span><span class="o">){</span> - <span class="n">universe</span><span class="o">.</span><span class="na">adjustPositions</span><span class="o">();</span> - <span class="c1">// display NBodyUniverse</span> - <span class="o">}</span> - </code></pre> - <p>This example allows Javaâ„¢ code to treat each Body in a traditional object-oriented fashion and also allows Aparapi kernels to act on the parallel primitive array form, in order to access/mutate the position of the bodies.</p> - </div> - </main> - <footer class='page-footer'> - <div class='container'> - <div class='row'> - <div class='col l4 s12'> - <h5 class='white-text'>Help Aparapi Grow</h5> - <p class='grey-text text-lighten-4'>We are a team of volunteers working on this project like it's our full time job. Any amount would help support and continue development on this project and is greatly appreciated.</p> - <form action='https://www.paypal.com/cgi-bin/webscr' id='paypal-donate' method='post' target='_top'> - <input name='cmd' type='hidden' value='_s-xclick'> - <input name='encrypted' type='hidden' value='-----BEGIN PKCS7-----MIIHoAYJKoZIhvcNAQcEoIIHkTCCB40CAQExggEwMIIBLAIBADCBlDCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20CAQAwDQYJKoZIhvcNAQEBBQAEgYATcKxN8t35TG2x34eY272SuZO3QbGy+BTGIM5DRV6Hmosotzw2TF42ceWmbXb3Gk4Wy5kUgo4TgHExCZHUSlHUl+A9KWLFejotgQJPhbiBsnns3klWbKftA3LEnP/kz/SW7OyBlpluoHoEGb354/aoX3JEctp3akHiZEmD7JyEgjELMAkGBSsOAwIaBQAwggEcBgkqhkiG9w0BBwEwFAYIKoZIhvcNAwcECOGCJwba6JICgIH4RtE1LE3juagKs+swI5tb9Y2LacWo+qn1H1aLKeg57bQMqqcWYvkoO1joYoglPc1h4mO0egZjHPQ6ih0K0IYlXw2SRpNylSlIMUE3GW6smjSSwRhscZfXQYUnmQsfYvkFwoKrlZGf/1u0Q7nwlZ1szIKnDMZ5f+k8xBcM0sMNutn/y9CH6A3zo01gQBIF29+1WYAoQspNAnfWQy3ydV7nbjIA9ThDp2WquWw3EVlvqlvm/3C2AFuH/L4q0ltn3qjkCdzXK0O2jW3TRrzligPkAy6CN0Tw2jGW5GENNC1L92vHFH4kBXUPlhvw39TgoN7/KRUjVoYPYgugggOHMIIDgzCCAuygAwIBAgIBADANBgkqhkiG9w0BAQUFADCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20wHhcNMDQwMjEzMTAxMzE1WhcNMzUwMjEzMTAxMzE1WjCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20wgZ8wDQYJKoZIhvcNAQEBBQADgY0AMIGJAoGBAMFHTt38RMxLXJyO2SmS+Ndl72T7oKJ4u4uw+6awntALWh03PewmIJuzbALScsTS4sZoS1fKciBGoh11gIfHzylvkdNe/hJl66/RGqrj5rFb08sAABNTzDTiqqNpJeBsYs/c2aiGozptX2RlnBktH+SUNpAajW724Nv2Wvhif6sFAgMBAAGjge4wgeswHQYDVR0OBBYEFJaffLvGbxe9WT9S1wob7BDWZJRrMIG7BgNVHSMEgbMwgbCAFJaffLvGbxe9WT9S1wob7BDWZJRroYGUpIGRMIGOMQswCQYDVQQGEwJVUzELMAkGA1UECBMCQ0ExFjAUBgNVBAcTDU1vdW50YWluIFZpZXcxFDASBgNVBAoTC1BheVBhbCBJbmMuMRMwEQYDVQQLFApsaXZlX2NlcnRzMREwDwYDVQQDFAhsaXZlX2FwaTEcMBoGCSqGSIb3DQEJARYNcmVAcGF5cGFsLmNvbYIBADAMBgNVHRMEBTADAQH/MA0GCSqGSIb3DQEBBQUAA4GBAIFfOlaagFrl71+jq6OKidbWFSE+Q4FqROvdgIONth+8kSK//Y/4ihuE4Ymvzn5ceE3S/iBSQQMjyvb+s2TWbQYDwcp129OPIbD9epdr4tJOUNiSojw7BHwYRiPh58S1xGlFgHFXwrEBb3dgNbMUa+u4qectsMAXpVHnD9wIyfmHMYIBmjCCAZYCAQEwgZQwgY4xCzAJBgNVBAYTAlVTMQswCQYDVQQIEwJDQTEWMBQGA1UEBxMNTW91bnRhaW4gVmlldzEUMBIGA1UEChMLUGF5UGFsIEluYy4xEzARBgNVBAsUCmxpdmVfY2VydHMxETAPBgNVBAMUCGxpdmVfYXBpMRwwGgYJKoZIhvcNAQkBFg1yZUBwYXlwYWwuY29tAgEAMAkGBSsOAwIaBQCgXTAYBgkqhkiG9w0BCQMxCwYJKoZIhvcNAQcBMBwGCSqGSIb3DQEJBTEPFw0xNjExMjkyMjA1NTNaMCMGCSqGSIb3DQEJBDEWBBS4i3Exr/pFcKOJy8uKmH+nGIMjqDANBgkqhkiG9w0BAQEFAASBgDAbFZ2jieloeB/0wCAcvYCFAIXmmBaMS5js/byzU5gK7exSTlRMX74IkmHemItaOcw3wyFlu4i118D9K9SbSbFiX9DGDcezGh42u/6G8TuZMwlvmiehwMioTVcm4jWG40YLiv8pJZypfoSx2w4IAFb4na5i/E1qOrwQOpiBho+s-----END PKCS7-----'> - <button alt='PayPal - The safer, easier way to pay online!' class='btn waves-effect waves-light red lighten-3' name='action' type='submit'> - Donate Now - </button> - </form> - </div> - <div class='col l4 s12'> - <h5 class='white-text'>Join the Discussion</h5> - <p class='grey-text text-lighten-4'>We have a Gitter chat room set up where you can talk directly with us. Come in and discuss new features, future goals, general problems or questions, or anything else you can think of.</p> - <a class='btn waves-effect waves-light red lighten-3' href='https://gitter.im/Syncleus/aparapi' target='_blank'>Chat</a> - </div> - <div class='col l4 s12' style='overflow: hidden;'> - <h5 class='white-text'>Connect</h5> - <iframe allowtransparency='true' frameborder='0' height='30' scrolling='0' src='http://ghbtns.com/github-btn.html?user=Syncleus&repo=aparapi&type=watch&count=true&size=large' width='170'></iframe> - <br> - <a class='twitter-follow-button' data-dnt='true' data-show-count='true' data-size='large' href='https://twitter.com/AparapiLib'>Follow @AparapiLib</a> - <br> - <div class='g-follow' data-annotation='bubble' data-height='24' data-href='https://plus.google.com/102266131584900704956' data-rel='publisher'></div> - </div> - </div> - </div> - <div class='footer-copyright'> - <div class='container'> - © 2016-2017 Syncleus, All rights reserved. - <a class='grey-text text-lighten-4 right' href='https://github.com/Syncleus/aparapi/blob/master/LICENSE'>Apache License v2</a> - </div> - </div> - </footer> - <!-- Scripts --> - <script src='https://code.jquery.com/jquery-2.1.4.min.js'></script> - <script> - if (!window.jQuery) { document.write('<script src="bin/jquery-2.1.1.min.js"><\/script>'); } - </script> - <script src='/javascripts/jquery.timeago.js'></script> - <script src='/javascripts/materialize.min.js'></script> - <script src='/javascripts/lunr.min.js'></script> - <script src='/javascripts/search.js'></script> - <script src='/javascripts/materialize.js'></script> - <script src='/javascripts/init.js'></script> - <!-- Twitter Button --> - <script> - !function(d,s,id){var js,fjs=d.getElementsByTagName(s)[0],p=/^http:/.test(d.location)?'http':'https';if(!d.getElementById(id)){js=d.createElement(s);js.id=id;js.src=p+'://platform.twitter.com/widgets.js';fjs.parentNode.insertBefore(js,fjs);}}(document, 'script', 'twitter-wjs'); - </script> - <!-- Google Plus Button --> - <script async='' defer='defer' src='https://apis.google.com/js/platform.js'></script> - </body> + <span class="o">}</span> + <span class="kt">void</span> <span class="nf">adjustPositions</span><span class="o">(){</span> + <span class="c1">// can now more efficiently use Aparapi</span> + <span class="o">}</span> +<span class="o">}</span> + + + +<span class="n">Body</span> <span class="n">bodies</span> <span class="o">=</span> <span class="k">new</span> <span class="n">Body</span><span class="o">[</span><span class="n">BODIES</span><span class="o">];</span> +<span class="k">for</span> <span class="o">(</span><span class="kt">int</span> <span class="n">i</span><span class="o">=</span><span class="mi">0</span><span class="o">;</span> <span class="n">i</span><span class="o"><</span><span class="n">bodies</span><span class="o">;</span> <span class="n">i</span><span class="o">++){</span> + <span class="n">bodies</span><span class="o">[</span><span class="n">i</span><span class="o">]</span> <span class="o">=</span> <span class="k">new</span> <span class="n">Body</span><span class="o">();</span> +<span class="o">}</span> +<span class="n">NBodyUniverse</span> <span class="n">universe</span> <span class="o">=</span> <span class="k">new</span> <span class="n">NBodyUniverse</span><span class="o">(</span><span class="n">bodies</span><span class="o">);</span> +<span class="k">while</span> <span class="o">(</span><span class="kc">true</span><span class="o">){</span> + <span class="n">universe</span><span class="o">.</span><span class="na">adjustPositions</span><span class="o">();</span> + <span class="c1">// display NBodyUniverse</span> +<span class="o">}</span> +</code></pre></div> +<p>This example allows Javaâ„¢ code to treat each Body in a traditional object-oriented fashion and also allows Aparapi kernels to act on the parallel primitive array form, in order to access/mutate the position of the bodies.</p> + +</div> +</main> +<footer class='page-footer'> +<div class='container'> +<div class='row'> +<div class='col l4 s12'> +<h5 class='white-text'>Help Aparapi Grow</h5> +<p class='grey-text text-lighten-4'>We are a team of volunteers working on this project like it's our full time job. Any amount would help support and continue development on this project and is greatly appreciated.</p> +<form action='https://www.paypal.com/cgi-bin/webscr' id='paypal-donate' method='post' target='_top'> +<input name='cmd' type='hidden' value='_s-xclick'> +<input name='encrypted' type='hidden' value='-----BEGIN PKCS7-----MIIHoAYJKoZIhvcNAQcEoIIHkTCCB40CAQExggEwMIIBLAIBADCBlDCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20CAQAwDQYJKoZIhvcNAQEBBQAEgYATcKxN8t35TG2x34eY272SuZO3QbGy+BTGIM5DRV6Hmosotzw2TF42ceWmbXb3Gk4Wy5kUgo4TgHExCZHUSlHUl+A9KWLFejotgQJPhbiBsnns3klWbKftA3LEnP/kz/SW7OyBlpluoHoEGb354/aoX3JEctp3akHiZEmD7JyEgjELMAkGBSsOAwIaBQAwggEcBgkqhkiG9w0BBwEwFAYIKoZIhvcNAwcECOGCJwba6JICgIH4RtE1LE3juagKs+swI5tb9Y2LacWo+qn1H1aLKeg57bQMqqcWYvkoO1joYoglPc1h4mO0egZjHPQ6ih0K0IYlXw2SRpNylSlIMUE3GW6smjSSwRhscZfXQYUnmQsfYvkFwoKrlZGf/1u0Q7nwlZ1szIKnDMZ5f+k8xBcM0sMNutn/y9CH6A3zo01gQBIF29+1WYAoQspNAnfWQy3ydV7nbjIA9ThDp2WquWw3EVlvqlvm/3C2AFuH/L4q0ltn3qjkCdzXK0O2jW3TRrzligPkAy6CN0Tw2jGW5GENNC1L92vHFH4kBXUPlhvw39TgoN7/KRUjVoYPYgugggOHMIIDgzCCAuygAwIBAgIBADANBgkqhkiG9w0BAQUFADCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20wHhcNMDQwMjEzMTAxMzE1WhcNMzUwMjEzMTAxMzE1WjCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20wgZ8wDQYJKoZIhvcNAQEBBQADgY0AMIGJAoGBAMFHTt38RMxLXJyO2SmS+Ndl72T7oKJ4u4uw+6awntALWh03PewmIJuzbALScsTS4sZoS1fKciBGoh11gIfHzylvkdNe/hJl66/RGqrj5rFb08sAABNTzDTiqqNpJeBsYs/c2aiGozptX2RlnBktH+SUNpAajW724Nv2Wvhif6sFAgMBAAGjge4wgeswHQYDVR0OBBYEFJaffLvGbxe9WT9S1wob7BDWZJRrMIG7BgNVHSMEgbMwgbCAFJaffLvGbxe9WT9S1wob7BDWZJRroYGUpIGRMIGOMQswCQYDVQQGEwJVUzELMAkGA1UECBMCQ0ExFjAUBgNVBAcTDU1vdW50YWluIFZpZXcxFDASBgNVBAoTC1BheVBhbCBJbmMuMRMwEQYDVQQLFApsaXZlX2NlcnRzMREwDwYDVQQDFAhsaXZlX2FwaTEcMBoGCSqGSIb3DQEJARYNcmVAcGF5cGFsLmNvbYIBADAMBgNVHRMEBTADAQH/MA0GCSqGSIb3DQEBBQUAA4GBAIFfOlaagFrl71+jq6OKidbWFSE+Q4FqROvdgIONth+8kSK//Y/4ihuE4Ymvzn5ceE3S/iBSQQMjyvb+s2TWbQYDwcp129OPIbD9epdr4tJOUNiSojw7BHwYRiPh58S1xGlFgHFXwrEBb3dgNbMUa+u4qectsMAXpVHnD9wIyfmHMYIBmjCCAZYCAQEwgZQwgY4xCzAJBgNVBAYTAlVTMQswCQYDVQQIEwJDQTEWMBQGA1UEBxMNTW91bnRhaW4gVmlldzEUMBIGA1UEChMLUGF5UGFsIEluYy4xEzARBgNVBAsUCmxpdmVfY2VydHMxETAPBgNVBAMUCGxpdmVfYXBpMRwwGgYJKoZIhvcNAQkBFg1yZUBwYXlwYWwuY29tAgEAMAkGBSsOAwIaBQCgXTAYBgkqhkiG9w0BCQMxCwYJKoZIhvcNAQcBMBwGCSqGSIb3DQEJBTEPFw0xNjExMjkyMjA1NTNaMCMGCSqGSIb3DQEJBDEWBBS4i3Exr/pFcKOJy8uKmH+nGIMjqDANBgkqhkiG9w0BAQEFAASBgDAbFZ2jieloeB/0wCAcvYCFAIXmmBaMS5js/byzU5gK7exSTlRMX74IkmHemItaOcw3wyFlu4i118D9K9SbSbFiX9DGDcezGh42u/6G8TuZMwlvmiehwMioTVcm4jWG40YLiv8pJZypfoSx2w4IAFb4na5i/E1qOrwQOpiBho+s-----END PKCS7----- +'> +<button alt='PayPal - The safer, easier way to pay online!' class='btn waves-effect waves-light red lighten-3' name='action' type='submit'> +Donate Now +</button> +</form> +</div> +<div class='col l4 s12'> +<h5 class='white-text'>Join the Discussion</h5> +<p class='grey-text text-lighten-4'>We have a Gitter chat room set up where you can talk directly with us. Come in and discuss new features, future goals, general problems or questions, or anything else you can think of.</p> +<a class='btn waves-effect waves-light red lighten-3' href='https://gitter.im/Syncleus/aparapi' target='_blank'>Chat</a> +</div> +<div class='col l4 s12' style='overflow: hidden;'> +<h5 class='white-text'>Connect</h5> +<iframe allowtransparency='true' frameborder='0' height='30' scrolling='0' src='http://ghbtns.com/github-btn.html?user=Syncleus&repo=aparapi&type=watch&count=true&size=large' width='170'></iframe> +<br> +<a class='twitter-follow-button' data-dnt='true' data-show-count='true' data-size='large' href='https://twitter.com/AparapiLib'>Follow @AparapiLib</a> +<br> +<div class='g-follow' data-annotation='bubble' data-height='24' data-href='https://plus.google.com/102266131584900704956' data-rel='publisher'></div> +</div> +</div> +</div> +<div class='footer-copyright'> +<div class='container'> +© 2016-2017 Syncleus, All rights reserved. +<a class='grey-text text-lighten-4 right' href='https://github.com/Syncleus/aparapi/blob/master/LICENSE'>Apache License v2</a> +</div> +</div> +</footer> +<!-- Scripts --> +<script src='https://code.jquery.com/jquery-2.1.4.min.js'></script> +<script> + if (!window.jQuery) { document.write('<script src="bin/jquery-2.1.1.min.js"><\/script>'); } +</script> +<script src='/javascripts/jquery.timeago.js'></script> +<script src='/javascripts/materialize.min.js'></script> +<script src='/javascripts/lunr.min.js'></script> +<script src='/javascripts/search.js'></script> +<script src='/javascripts/materialize.js'></script> +<script src='/javascripts/init.js'></script> +<!-- Twitter Button --> +<script> + !function(d,s,id){var js,fjs=d.getElementsByTagName(s)[0],p=/^http:/.test(d.location)?'http':'https';if(!d.getElementById(id)){js=d.createElement(s);js.id=id;js.src=p+'://platform.twitter.com/widgets.js';fjs.parentNode.insertBefore(js,fjs);}}(document, 'script', 'twitter-wjs'); +</script> +<!-- Google Plus Button --> +<script async='' defer='defer' src='https://apis.google.com/js/platform.js'></script> +</body> </html> diff --git a/documentation/choosing-specific-devices.html b/documentation/choosing-specific-devices.html index 27b9fbbfe8c717f58d2cc3923f08938d832827be..9e016507c2cc51682c3d2faaddf904438dfefcff 100644 --- a/documentation/choosing-specific-devices.html +++ b/documentation/choosing-specific-devices.html @@ -1,171 +1,173 @@ <!DOCTYPE html> <html lang='en'> - <head> - <meta content='text/html; charset=UTF-8' http-equiv='Content-Type'> - <meta content='width=device-width, initial-scale=1' name='viewport'> - <meta content='IE=edge' http-equiv='X-UA-Compatible'> - <meta content='no' name='msapplication-tap-highlight'> - <meta content='Aparapi is an Open-source framework for executing native Java code on the GPU, developed by Syncleus.' name='description'> - <title> - Aparapi | Choosing Specific Devices - </title> - <!-- Favicons --> - <link href='/images/favicon/apple-touch-icon-152x152.png' rel='apple-touch-icon-precomposed'> - <meta content='#FFFFFF' name='msapplication-TileColor'> - <meta content='/images/favicon/mstile-144x144.png' name='msapplication-TileImage'> - <link href='/images/favicon/favicon-32x32.png' rel='icon' sizes='32x32'> - <!-- Android 5 Chrome Color --> - <meta content='#EE6E73' name='theme-color'> - <!-- CSS --> - <link href='/stylesheets/highlight.css' media='screen,projection' rel='stylesheet' type='text/css'> - <link href='/stylesheets/style.css' media='screen,projection' rel='stylesheet' type='text/css'> - <link href='http://fonts.googleapis.com/css?family=Inconsolata' rel='stylesheet' type='text/css'> - <link href='http://fonts.googleapis.com/icon?family=Material+Icons' rel='stylesheet'> - </head> - <body> - <header> - <div class='container'> - <a class='button-collapse top-nav waves-effect waves-light circle hide-on-large-only' data-activates='nav-mobile' href='#'> - <i class='material-icons'>menu</i> - </a> - </div> - <ul class='side-nav fixed' id='nav-mobile'> - <li class='logo'> - <a class='brand-logo' href='/' id='logo-container'> - <object data='/images/logo.svg' id='front-page-logo' type='image/svg+xml'>Your browser does not support SVG</object> - </a> - </li> - <li class='search'> - <div class='search-wrapper card'> - <input id='search'> - <i class='material-icons'>search</i> - <div class='search-results'></div> - </div> - </li> - <li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/">Overview</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Introduction</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/about.html">About</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/getting-started.html">Getting Started</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/faq.html">FAQ</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Documentation</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/aparapi-patterns.html">Aparapi Patterns</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/choosing-specific-devices.html">Choosing Specific Devices</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/converting-java-to-opencl.html">Converting Java to OpenCL</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/emulating-multiple-entrypoints.html">Emulating Multiple Entrypoints</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/explicit-buffer-handling.html">Explicit Buffer Handling</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/hsa-enabled-lambda.html">HSA Enabled Lambda</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/kernel-guidelines.html">Kernel Guidelines</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/library-agent-duality.html">Library Agent Duality</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/new-features.html">New Features</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/opencl-bindings.html">OpenCL Bindings</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/private-memory-space.html">Private Memory Space</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/profiling-the-kernel.html">Profiling the Kernel</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/setting-up-hsa.html">Setting Up HSA</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/unit-tests.html">Unit Tests</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/using-hsa-simulator.html">Using HSA Simulator</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/constant-memory.html">Constant Memory</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/local-memory.html">Local Memory</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/multiple-dim-ranges.html">Multiple Dim Ranges</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Proposals</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/multiple-dim-nd-range.html">Multiple Dim ND Range</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/lambdas.html">Lambdas</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/address-space-with-buffers.html">Address Space with Buffers</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/extensions.html">Extensions</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/device.html">Device</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/multiple-entry-points.html">Multiple Entry Points</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/lambda-syntax.html">Lambda Syntax</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/showcase.html">Showcase</a></li></ul></li> - </ul> - </header> - <main> - <div class='section no-pad-bot' id='index-banner'> - <div class='container'> - <h1 class='header center-on-small-only'>Choosing Specific Devices</h1> - <div class='row center'> - <h4 class='header col s12 light center'>Using the new Device API's to choose Kernel execution on a specific device.</h4> - </div> - - </div> - - </div> - <div class='container'> - <p>Previously Aparapi chose the first GPU device when <code>Kernel.execute()</code> was called. This make it easy to execute simple Kernels, but was problematic when users wished finer control over which device should be chosen. Especially when the first device may be unsuitable. We recently added new classes and API’s to allow the developer to specify exactly which device we intend to target.</p> - - <p>A new Device class has been added. This allows the user to select a specific device; either by calling a helper method <code>Device.firstGPU()</code> or <code>Device.best()</code>. Or by allowing the user to iterate through all devices and choose one based on some other criteria (capabilities? vendor name?).</p> - - <p>So selecting the ‘best’ (most performant) device could be achieved using.</p> - <pre class="highlight java"><code> - <span class="n">Device</span> <span class="n">device</span> <span class="o">=</span> <span class="n">Device</span><span class="o">.</span><span class="na">best</span><span class="o">();</span> - </code></pre> - <p>Alternatively if I wanted the first AMD GPU device I might use:-</p> - <pre class="highlight java"><code> - <span class="n">Device</span> <span class="n">chosen</span><span class="o">=</span><span class="kc">null</span><span class="o">;</span> - <span class="k">for</span> <span class="o">(</span><span class="n">Device</span> <span class="nl">device:</span> <span class="n">devices</span><span class="o">.</span><span class="na">getAll</span><span class="o">()){</span> - <span class="k">if</span> <span class="o">(</span><span class="n">device</span><span class="o">.</span><span class="na">getVendor</span><span class="o">().</span><span class="na">contains</span><span class="o">(</span><span class="s">"AMD"</span><span class="o">)</span> <span class="o">&&</span> <span class="n">device</span><span class="o">.</span><span class="na">isGPU</span><span class="o">()){</span> - <span class="n">chosen</span> <span class="o">=</span> <span class="n">device</span><span class="o">;</span> - <span class="k">break</span><span class="o">;</span> - <span class="o">}</span> - <span class="o">}</span> - </code></pre> - <p>A Device can be queried (<code>isGPU()</code>, <code>isOpenCL()</code>, <code>isGroup()</code>, <code>isJava()</code>, <code>getOpenCLPlatform()</code>, <code>getMaxMemory()</code>, <code>getLocalSizes()</code>) to yield it’s characteristics.</p> - - <p>To execute on a specific device we must use the device to create our range.</p> - <pre class="highlight java"><code> - <span class="n">Range</span> <span class="n">range</span> <span class="o">=</span> <span class="n">device</span><span class="o">.</span><span class="na">createRange2D</span><span class="o">(</span><span class="n">width</span><span class="o">,</span> <span class="n">height</span><span class="o">);</span> - </code></pre> - <p>This allows the Range to be created with knowledge of the underlying device. So for example <code>device.createRange3D(1024, 1024, 1024, 16, 16, 16)</code> will fail if the device does not allow a local size of (16x16x16).</p> - - <p>A range created using a device method captures the device which created it. The range instance has a device field which is set by the device which creates it.</p> - - <p>It’s as if we had this code</p> - <pre class="highlight java"><code> - <span class="n">Range</span> <span class="n">range</span> <span class="o">=</span> <span class="n">Range</span><span class="o">.</span><span class="na">create</span><span class="o">(</span><span class="n">width</span><span class="o">,</span> <span class="n">height</span><span class="o">);</span> - <span class="n">range</span><span class="o">.</span><span class="na">setDevice</span><span class="o">(</span><span class="n">device</span><span class="o">);</span> - </code></pre> - <p>So the Range locks the device that it can be used with.</p> - - <p>Now when we have a Kernel.</p> - <pre class="highlight java"><code> - <span class="n">Kernel</span> <span class="n">kernel</span> <span class="o">=</span> <span class="k">new</span> <span class="n">Kernel</span><span class="o">(){</span> - <span class="nd">@Override</span> <span class="kd">public</span> <span class="kt">void</span> <span class="nf">run</span><span class="o">(){</span> - <span class="o">...</span> - <span class="o">}</span> - <span class="o">}</span> - </code></pre> - <p>And we then use a device created range.</p> - <pre class="highlight java"><code> - <span class="n">Device</span> <span class="n">device</span> <span class="o">=</span> <span class="n">Device</span><span class="o">.</span><span class="na">firstGPU</span><span class="o">();</span> - <span class="n">Kernel</span> <span class="n">kernel</span> <span class="o">=</span> <span class="k">new</span> <span class="n">Kernel</span><span class="o">(){</span> - <span class="nd">@Override</span> <span class="kd">public</span> <span class="kt">void</span> <span class="nf">run</span><span class="o">(){</span> - <span class="c1">// uses input[];</span> - <span class="o">}</span> - <span class="o">};</span> - <span class="n">range</span> <span class="o">=</span> <span class="n">device</span><span class="o">.</span><span class="na">createRange2D</span><span class="o">(</span><span class="mi">1024</span><span class="o">,</span> <span class="mi">1024</span><span class="o">);</span> - <span class="n">kernel</span><span class="o">.</span><span class="na">execute</span><span class="o">(</span><span class="n">range</span><span class="o">);</span> - </code></pre> - <p>We have forced execution on the first GPU.</p> - </div> - </main> - <footer class='page-footer'> - <div class='container'> - <div class='row'> - <div class='col l4 s12'> - <h5 class='white-text'>Help Aparapi Grow</h5> - <p class='grey-text text-lighten-4'>We are a team of volunteers working on this project like it's our full time job. Any amount would help support and continue development on this project and is greatly appreciated.</p> - <form action='https://www.paypal.com/cgi-bin/webscr' id='paypal-donate' method='post' target='_top'> - <input name='cmd' type='hidden' value='_s-xclick'> - <input name='encrypted' type='hidden' value='-----BEGIN PKCS7-----MIIHoAYJKoZIhvcNAQcEoIIHkTCCB40CAQExggEwMIIBLAIBADCBlDCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20CAQAwDQYJKoZIhvcNAQEBBQAEgYATcKxN8t35TG2x34eY272SuZO3QbGy+BTGIM5DRV6Hmosotzw2TF42ceWmbXb3Gk4Wy5kUgo4TgHExCZHUSlHUl+A9KWLFejotgQJPhbiBsnns3klWbKftA3LEnP/kz/SW7OyBlpluoHoEGb354/aoX3JEctp3akHiZEmD7JyEgjELMAkGBSsOAwIaBQAwggEcBgkqhkiG9w0BBwEwFAYIKoZIhvcNAwcECOGCJwba6JICgIH4RtE1LE3juagKs+swI5tb9Y2LacWo+qn1H1aLKeg57bQMqqcWYvkoO1joYoglPc1h4mO0egZjHPQ6ih0K0IYlXw2SRpNylSlIMUE3GW6smjSSwRhscZfXQYUnmQsfYvkFwoKrlZGf/1u0Q7nwlZ1szIKnDMZ5f+k8xBcM0sMNutn/y9CH6A3zo01gQBIF29+1WYAoQspNAnfWQy3ydV7nbjIA9ThDp2WquWw3EVlvqlvm/3C2AFuH/L4q0ltn3qjkCdzXK0O2jW3TRrzligPkAy6CN0Tw2jGW5GENNC1L92vHFH4kBXUPlhvw39TgoN7/KRUjVoYPYgugggOHMIIDgzCCAuygAwIBAgIBADANBgkqhkiG9w0BAQUFADCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20wHhcNMDQwMjEzMTAxMzE1WhcNMzUwMjEzMTAxMzE1WjCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20wgZ8wDQYJKoZIhvcNAQEBBQADgY0AMIGJAoGBAMFHTt38RMxLXJyO2SmS+Ndl72T7oKJ4u4uw+6awntALWh03PewmIJuzbALScsTS4sZoS1fKciBGoh11gIfHzylvkdNe/hJl66/RGqrj5rFb08sAABNTzDTiqqNpJeBsYs/c2aiGozptX2RlnBktH+SUNpAajW724Nv2Wvhif6sFAgMBAAGjge4wgeswHQYDVR0OBBYEFJaffLvGbxe9WT9S1wob7BDWZJRrMIG7BgNVHSMEgbMwgbCAFJaffLvGbxe9WT9S1wob7BDWZJRroYGUpIGRMIGOMQswCQYDVQQGEwJVUzELMAkGA1UECBMCQ0ExFjAUBgNVBAcTDU1vdW50YWluIFZpZXcxFDASBgNVBAoTC1BheVBhbCBJbmMuMRMwEQYDVQQLFApsaXZlX2NlcnRzMREwDwYDVQQDFAhsaXZlX2FwaTEcMBoGCSqGSIb3DQEJARYNcmVAcGF5cGFsLmNvbYIBADAMBgNVHRMEBTADAQH/MA0GCSqGSIb3DQEBBQUAA4GBAIFfOlaagFrl71+jq6OKidbWFSE+Q4FqROvdgIONth+8kSK//Y/4ihuE4Ymvzn5ceE3S/iBSQQMjyvb+s2TWbQYDwcp129OPIbD9epdr4tJOUNiSojw7BHwYRiPh58S1xGlFgHFXwrEBb3dgNbMUa+u4qectsMAXpVHnD9wIyfmHMYIBmjCCAZYCAQEwgZQwgY4xCzAJBgNVBAYTAlVTMQswCQYDVQQIEwJDQTEWMBQGA1UEBxMNTW91bnRhaW4gVmlldzEUMBIGA1UEChMLUGF5UGFsIEluYy4xEzARBgNVBAsUCmxpdmVfY2VydHMxETAPBgNVBAMUCGxpdmVfYXBpMRwwGgYJKoZIhvcNAQkBFg1yZUBwYXlwYWwuY29tAgEAMAkGBSsOAwIaBQCgXTAYBgkqhkiG9w0BCQMxCwYJKoZIhvcNAQcBMBwGCSqGSIb3DQEJBTEPFw0xNjExMjkyMjA1NTNaMCMGCSqGSIb3DQEJBDEWBBS4i3Exr/pFcKOJy8uKmH+nGIMjqDANBgkqhkiG9w0BAQEFAASBgDAbFZ2jieloeB/0wCAcvYCFAIXmmBaMS5js/byzU5gK7exSTlRMX74IkmHemItaOcw3wyFlu4i118D9K9SbSbFiX9DGDcezGh42u/6G8TuZMwlvmiehwMioTVcm4jWG40YLiv8pJZypfoSx2w4IAFb4na5i/E1qOrwQOpiBho+s-----END PKCS7-----'> - <button alt='PayPal - The safer, easier way to pay online!' class='btn waves-effect waves-light red lighten-3' name='action' type='submit'> - Donate Now - </button> - </form> - </div> - <div class='col l4 s12'> - <h5 class='white-text'>Join the Discussion</h5> - <p class='grey-text text-lighten-4'>We have a Gitter chat room set up where you can talk directly with us. Come in and discuss new features, future goals, general problems or questions, or anything else you can think of.</p> - <a class='btn waves-effect waves-light red lighten-3' href='https://gitter.im/Syncleus/aparapi' target='_blank'>Chat</a> - </div> - <div class='col l4 s12' style='overflow: hidden;'> - <h5 class='white-text'>Connect</h5> - <iframe allowtransparency='true' frameborder='0' height='30' scrolling='0' src='http://ghbtns.com/github-btn.html?user=Syncleus&repo=aparapi&type=watch&count=true&size=large' width='170'></iframe> - <br> - <a class='twitter-follow-button' data-dnt='true' data-show-count='true' data-size='large' href='https://twitter.com/AparapiLib'>Follow @AparapiLib</a> - <br> - <div class='g-follow' data-annotation='bubble' data-height='24' data-href='https://plus.google.com/102266131584900704956' data-rel='publisher'></div> - </div> - </div> - </div> - <div class='footer-copyright'> - <div class='container'> - © 2016-2017 Syncleus, All rights reserved. - <a class='grey-text text-lighten-4 right' href='https://github.com/Syncleus/aparapi/blob/master/LICENSE'>Apache License v2</a> - </div> - </div> - </footer> - <!-- Scripts --> - <script src='https://code.jquery.com/jquery-2.1.4.min.js'></script> - <script> - if (!window.jQuery) { document.write('<script src="bin/jquery-2.1.1.min.js"><\/script>'); } - </script> - <script src='/javascripts/jquery.timeago.js'></script> - <script src='/javascripts/materialize.min.js'></script> - <script src='/javascripts/lunr.min.js'></script> - <script src='/javascripts/search.js'></script> - <script src='/javascripts/materialize.js'></script> - <script src='/javascripts/init.js'></script> - <!-- Twitter Button --> - <script> - !function(d,s,id){var js,fjs=d.getElementsByTagName(s)[0],p=/^http:/.test(d.location)?'http':'https';if(!d.getElementById(id)){js=d.createElement(s);js.id=id;js.src=p+'://platform.twitter.com/widgets.js';fjs.parentNode.insertBefore(js,fjs);}}(document, 'script', 'twitter-wjs'); - </script> - <!-- Google Plus Button --> - <script async='' defer='defer' src='https://apis.google.com/js/platform.js'></script> - </body> +<head> +<meta content='text/html; charset=UTF-8' http-equiv='Content-Type'> +<meta content='width=device-width, initial-scale=1' name='viewport'> +<meta content='IE=edge' http-equiv='X-UA-Compatible'> +<meta content='no' name='msapplication-tap-highlight'> +<meta content='Aparapi is an Open-source framework for executing native Java code on the GPU, developed by Syncleus.' name='description'> +<title> +Aparapi | Choosing Specific Devices +</title> +<!-- Favicons --> +<link href='/images/favicon/apple-touch-icon-152x152.png' rel='apple-touch-icon-precomposed'> +<meta content='#FFFFFF' name='msapplication-TileColor'> +<meta content='/images/favicon/mstile-144x144.png' name='msapplication-TileImage'> +<link href='/images/favicon/favicon-32x32.png' rel='icon' sizes='32x32'> +<!-- Android 5 Chrome Color --> +<meta content='#EE6E73' name='theme-color'> +<!-- CSS --> +<link href='/stylesheets/highlight.css' media='screen,projection' rel='stylesheet' type='text/css'> +<link href='/stylesheets/style.css' media='screen,projection' rel='stylesheet' type='text/css'> +<link href='http://fonts.googleapis.com/css?family=Inconsolata' rel='stylesheet' type='text/css'> +<link href='http://fonts.googleapis.com/icon?family=Material+Icons' rel='stylesheet'> +</head> +<body> +<header> +<div class='container'> +<a class='button-collapse top-nav waves-effect waves-light circle hide-on-large-only' data-activates='nav-mobile' href='#'> +<i class='material-icons'>menu</i> +</a> +</div> +<ul class='side-nav fixed' id='nav-mobile'> +<li class='logo'> +<a class='brand-logo' href='/' id='logo-container'> +<object data='/images/logo.svg' id='front-page-logo' type='image/svg+xml'>Your browser does not support SVG</object> +</a> +</li> +<li class='search'> +<div class='search-wrapper card'> +<input id='search'> +<i class='material-icons'>search</i> +<div class='search-results'></div> +</div> +</li> +<li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/">Overview</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Introduction</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/about.html">About</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/getting-started.html">Getting Started</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/faq.html">FAQ</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Documentation</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/aparapi-patterns.html">Aparapi Patterns</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/choosing-specific-devices.html">Choosing Specific Devices</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/converting-java-to-opencl.html">Converting Java to OpenCL</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/emulating-multiple-entrypoints.html">Emulating Multiple Entrypoints</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/explicit-buffer-handling.html">Explicit Buffer Handling</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/hsa-enabled-lambda.html">HSA Enabled Lambda</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/kernel-guidelines.html">Kernel Guidelines</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/library-agent-duality.html">Library Agent Duality</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/new-features.html">New Features</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/opencl-bindings.html">OpenCL Bindings</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/private-memory-space.html">Private Memory Space</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/profiling-the-kernel.html">Profiling the Kernel</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/setting-up-hsa.html">Setting Up HSA</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/unit-tests.html">Unit Tests</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/using-hsa-simulator.html">Using HSA Simulator</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/constant-memory.html">Constant Memory</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/local-memory.html">Local Memory</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/multiple-dim-ranges.html">Multiple Dim Ranges</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Proposals</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/multiple-dim-nd-range.html">Multiple Dim ND Range</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/lambdas.html">Lambdas</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/address-space-with-buffers.html">Address Space with Buffers</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/extensions.html">Extensions</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/device.html">Device</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/multiple-entry-points.html">Multiple Entry Points</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/lambda-syntax.html">Lambda Syntax</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/showcase.html">Showcase</a></li></ul></li> +</ul> +</header> +<main> +<div class='section no-pad-bot' id='index-banner'> +<div class='container'> +<h1 class='header center-on-small-only'>Choosing Specific Devices</h1> +<div class='row center'> +<h4 class='header col s12 light center'>Using the new Device API's to choose Kernel execution on a specific device.</h4> +</div> + +</div> + +</div> +<div class='container'> +<p>Previously Aparapi chose the first GPU device when <code>Kernel.execute()</code> was called. This make it easy to execute simple Kernels, but was problematic when users wished finer control over which device should be chosen. Especially when the first device may be unsuitable. We recently added new classes and API’s to allow the developer to specify exactly which device we intend to target.</p> + +<p>A new Device class has been added. This allows the user to select a specific device; either by calling a helper method <code>Device.firstGPU()</code> or <code>Device.best()</code>. Or by allowing the user to iterate through all devices and choose one based on some other criteria (capabilities? vendor name?).</p> + +<p>So selecting the ‘best’ (most performant) device could be achieved using.</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="n">Device</span> <span class="n">device</span> <span class="o">=</span> <span class="n">Device</span><span class="o">.</span><span class="na">best</span><span class="o">();</span> +</code></pre></div> +<p>Alternatively if I wanted the first AMD GPU device I might use:-</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="n">Device</span> <span class="n">chosen</span><span class="o">=</span><span class="kc">null</span><span class="o">;</span> +<span class="k">for</span> <span class="o">(</span><span class="n">Device</span> <span class="nl">device:</span> <span class="n">devices</span><span class="o">.</span><span class="na">getAll</span><span class="o">()){</span> + <span class="k">if</span> <span class="o">(</span><span class="n">device</span><span class="o">.</span><span class="na">getVendor</span><span class="o">().</span><span class="na">contains</span><span class="o">(</span><span class="s">"AMD"</span><span class="o">)</span> <span class="o">&&</span> <span class="n">device</span><span class="o">.</span><span class="na">isGPU</span><span class="o">()){</span> + <span class="n">chosen</span> <span class="o">=</span> <span class="n">device</span><span class="o">;</span> + <span class="k">break</span><span class="o">;</span> + <span class="o">}</span> +<span class="o">}</span> +</code></pre></div> +<p>A Device can be queried (<code>isGPU()</code>, <code>isOpenCL()</code>, <code>isGroup()</code>, <code>isJava()</code>, <code>getOpenCLPlatform()</code>, <code>getMaxMemory()</code>, <code>getLocalSizes()</code>) to yield it’s characteristics.</p> + +<p>To execute on a specific device we must use the device to create our range.</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="n">Range</span> <span class="n">range</span> <span class="o">=</span> <span class="n">device</span><span class="o">.</span><span class="na">createRange2D</span><span class="o">(</span><span class="n">width</span><span class="o">,</span> <span class="n">height</span><span class="o">);</span> +</code></pre></div> +<p>This allows the Range to be created with knowledge of the underlying device. So for example <code>device.createRange3D(1024, 1024, 1024, 16, 16, 16)</code> will fail if the device does not allow a local size of (16x16x16).</p> + +<p>A range created using a device method captures the device which created it. The range instance has a device field which is set by the device which creates it.</p> + +<p>It’s as if we had this code</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="n">Range</span> <span class="n">range</span> <span class="o">=</span> <span class="n">Range</span><span class="o">.</span><span class="na">create</span><span class="o">(</span><span class="n">width</span><span class="o">,</span> <span class="n">height</span><span class="o">);</span> +<span class="n">range</span><span class="o">.</span><span class="na">setDevice</span><span class="o">(</span><span class="n">device</span><span class="o">);</span> +</code></pre></div> +<p>So the Range locks the device that it can be used with.</p> + +<p>Now when we have a Kernel.</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="n">Kernel</span> <span class="n">kernel</span> <span class="o">=</span> <span class="k">new</span> <span class="n">Kernel</span><span class="o">(){</span> + <span class="nd">@Override</span> <span class="kd">public</span> <span class="kt">void</span> <span class="nf">run</span><span class="o">(){</span> + <span class="o">...</span> + <span class="o">}</span> +<span class="o">}</span> +</code></pre></div> +<p>And we then use a device created range.</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="n">Device</span> <span class="n">device</span> <span class="o">=</span> <span class="n">Device</span><span class="o">.</span><span class="na">firstGPU</span><span class="o">();</span> +<span class="n">Kernel</span> <span class="n">kernel</span> <span class="o">=</span> <span class="k">new</span> <span class="n">Kernel</span><span class="o">(){</span> + <span class="nd">@Override</span> <span class="kd">public</span> <span class="kt">void</span> <span class="nf">run</span><span class="o">(){</span> + <span class="c1">// uses input[];</span> + <span class="o">}</span> +<span class="o">};</span> +<span class="n">range</span> <span class="o">=</span> <span class="n">device</span><span class="o">.</span><span class="na">createRange2D</span><span class="o">(</span><span class="mi">1024</span><span class="o">,</span> <span class="mi">1024</span><span class="o">);</span> +<span class="n">kernel</span><span class="o">.</span><span class="na">execute</span><span class="o">(</span><span class="n">range</span><span class="o">);</span> +</code></pre></div> +<p>We have forced execution on the first GPU.</p> + +</div> +</main> +<footer class='page-footer'> +<div class='container'> +<div class='row'> +<div class='col l4 s12'> +<h5 class='white-text'>Help Aparapi Grow</h5> +<p class='grey-text text-lighten-4'>We are a team of volunteers working on this project like it's our full time job. Any amount would help support and continue development on this project and is greatly appreciated.</p> +<form action='https://www.paypal.com/cgi-bin/webscr' id='paypal-donate' method='post' target='_top'> +<input name='cmd' type='hidden' value='_s-xclick'> +<input name='encrypted' type='hidden' value='-----BEGIN PKCS7-----MIIHoAYJKoZIhvcNAQcEoIIHkTCCB40CAQExggEwMIIBLAIBADCBlDCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20CAQAwDQYJKoZIhvcNAQEBBQAEgYATcKxN8t35TG2x34eY272SuZO3QbGy+BTGIM5DRV6Hmosotzw2TF42ceWmbXb3Gk4Wy5kUgo4TgHExCZHUSlHUl+A9KWLFejotgQJPhbiBsnns3klWbKftA3LEnP/kz/SW7OyBlpluoHoEGb354/aoX3JEctp3akHiZEmD7JyEgjELMAkGBSsOAwIaBQAwggEcBgkqhkiG9w0BBwEwFAYIKoZIhvcNAwcECOGCJwba6JICgIH4RtE1LE3juagKs+swI5tb9Y2LacWo+qn1H1aLKeg57bQMqqcWYvkoO1joYoglPc1h4mO0egZjHPQ6ih0K0IYlXw2SRpNylSlIMUE3GW6smjSSwRhscZfXQYUnmQsfYvkFwoKrlZGf/1u0Q7nwlZ1szIKnDMZ5f+k8xBcM0sMNutn/y9CH6A3zo01gQBIF29+1WYAoQspNAnfWQy3ydV7nbjIA9ThDp2WquWw3EVlvqlvm/3C2AFuH/L4q0ltn3qjkCdzXK0O2jW3TRrzligPkAy6CN0Tw2jGW5GENNC1L92vHFH4kBXUPlhvw39TgoN7/KRUjVoYPYgugggOHMIIDgzCCAuygAwIBAgIBADANBgkqhkiG9w0BAQUFADCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20wHhcNMDQwMjEzMTAxMzE1WhcNMzUwMjEzMTAxMzE1WjCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20wgZ8wDQYJKoZIhvcNAQEBBQADgY0AMIGJAoGBAMFHTt38RMxLXJyO2SmS+Ndl72T7oKJ4u4uw+6awntALWh03PewmIJuzbALScsTS4sZoS1fKciBGoh11gIfHzylvkdNe/hJl66/RGqrj5rFb08sAABNTzDTiqqNpJeBsYs/c2aiGozptX2RlnBktH+SUNpAajW724Nv2Wvhif6sFAgMBAAGjge4wgeswHQYDVR0OBBYEFJaffLvGbxe9WT9S1wob7BDWZJRrMIG7BgNVHSMEgbMwgbCAFJaffLvGbxe9WT9S1wob7BDWZJRroYGUpIGRMIGOMQswCQYDVQQGEwJVUzELMAkGA1UECBMCQ0ExFjAUBgNVBAcTDU1vdW50YWluIFZpZXcxFDASBgNVBAoTC1BheVBhbCBJbmMuMRMwEQYDVQQLFApsaXZlX2NlcnRzMREwDwYDVQQDFAhsaXZlX2FwaTEcMBoGCSqGSIb3DQEJARYNcmVAcGF5cGFsLmNvbYIBADAMBgNVHRMEBTADAQH/MA0GCSqGSIb3DQEBBQUAA4GBAIFfOlaagFrl71+jq6OKidbWFSE+Q4FqROvdgIONth+8kSK//Y/4ihuE4Ymvzn5ceE3S/iBSQQMjyvb+s2TWbQYDwcp129OPIbD9epdr4tJOUNiSojw7BHwYRiPh58S1xGlFgHFXwrEBb3dgNbMUa+u4qectsMAXpVHnD9wIyfmHMYIBmjCCAZYCAQEwgZQwgY4xCzAJBgNVBAYTAlVTMQswCQYDVQQIEwJDQTEWMBQGA1UEBxMNTW91bnRhaW4gVmlldzEUMBIGA1UEChMLUGF5UGFsIEluYy4xEzARBgNVBAsUCmxpdmVfY2VydHMxETAPBgNVBAMUCGxpdmVfYXBpMRwwGgYJKoZIhvcNAQkBFg1yZUBwYXlwYWwuY29tAgEAMAkGBSsOAwIaBQCgXTAYBgkqhkiG9w0BCQMxCwYJKoZIhvcNAQcBMBwGCSqGSIb3DQEJBTEPFw0xNjExMjkyMjA1NTNaMCMGCSqGSIb3DQEJBDEWBBS4i3Exr/pFcKOJy8uKmH+nGIMjqDANBgkqhkiG9w0BAQEFAASBgDAbFZ2jieloeB/0wCAcvYCFAIXmmBaMS5js/byzU5gK7exSTlRMX74IkmHemItaOcw3wyFlu4i118D9K9SbSbFiX9DGDcezGh42u/6G8TuZMwlvmiehwMioTVcm4jWG40YLiv8pJZypfoSx2w4IAFb4na5i/E1qOrwQOpiBho+s-----END PKCS7----- +'> +<button alt='PayPal - The safer, easier way to pay online!' class='btn waves-effect waves-light red lighten-3' name='action' type='submit'> +Donate Now +</button> +</form> +</div> +<div class='col l4 s12'> +<h5 class='white-text'>Join the Discussion</h5> +<p class='grey-text text-lighten-4'>We have a Gitter chat room set up where you can talk directly with us. Come in and discuss new features, future goals, general problems or questions, or anything else you can think of.</p> +<a class='btn waves-effect waves-light red lighten-3' href='https://gitter.im/Syncleus/aparapi' target='_blank'>Chat</a> +</div> +<div class='col l4 s12' style='overflow: hidden;'> +<h5 class='white-text'>Connect</h5> +<iframe allowtransparency='true' frameborder='0' height='30' scrolling='0' src='http://ghbtns.com/github-btn.html?user=Syncleus&repo=aparapi&type=watch&count=true&size=large' width='170'></iframe> +<br> +<a class='twitter-follow-button' data-dnt='true' data-show-count='true' data-size='large' href='https://twitter.com/AparapiLib'>Follow @AparapiLib</a> +<br> +<div class='g-follow' data-annotation='bubble' data-height='24' data-href='https://plus.google.com/102266131584900704956' data-rel='publisher'></div> +</div> +</div> +</div> +<div class='footer-copyright'> +<div class='container'> +© 2016-2017 Syncleus, All rights reserved. +<a class='grey-text text-lighten-4 right' href='https://github.com/Syncleus/aparapi/blob/master/LICENSE'>Apache License v2</a> +</div> +</div> +</footer> +<!-- Scripts --> +<script src='https://code.jquery.com/jquery-2.1.4.min.js'></script> +<script> + if (!window.jQuery) { document.write('<script src="bin/jquery-2.1.1.min.js"><\/script>'); } +</script> +<script src='/javascripts/jquery.timeago.js'></script> +<script src='/javascripts/materialize.min.js'></script> +<script src='/javascripts/lunr.min.js'></script> +<script src='/javascripts/search.js'></script> +<script src='/javascripts/materialize.js'></script> +<script src='/javascripts/init.js'></script> +<!-- Twitter Button --> +<script> + !function(d,s,id){var js,fjs=d.getElementsByTagName(s)[0],p=/^http:/.test(d.location)?'http':'https';if(!d.getElementById(id)){js=d.createElement(s);js.id=id;js.src=p+'://platform.twitter.com/widgets.js';fjs.parentNode.insertBefore(js,fjs);}}(document, 'script', 'twitter-wjs'); +</script> +<!-- Google Plus Button --> +<script async='' defer='defer' src='https://apis.google.com/js/platform.js'></script> +</body> </html> diff --git a/documentation/constant-memory.html b/documentation/constant-memory.html index dfd38d6ad5907d14abfafa736bc2a60e64f2a788..f0ce7cf37fbcfb4cfd11944f4e7ff6fd288e5cc1 100644 --- a/documentation/constant-memory.html +++ b/documentation/constant-memory.html @@ -1,167 +1,169 @@ <!DOCTYPE html> <html lang='en'> - <head> - <meta content='text/html; charset=UTF-8' http-equiv='Content-Type'> - <meta content='width=device-width, initial-scale=1' name='viewport'> - <meta content='IE=edge' http-equiv='X-UA-Compatible'> - <meta content='no' name='msapplication-tap-highlight'> - <meta content='Aparapi is an Open-source framework for executing native Java code on the GPU, developed by Syncleus.' name='description'> - <title> - Aparapi | Constant Memory - </title> - <!-- Favicons --> - <link href='/images/favicon/apple-touch-icon-152x152.png' rel='apple-touch-icon-precomposed'> - <meta content='#FFFFFF' name='msapplication-TileColor'> - <meta content='/images/favicon/mstile-144x144.png' name='msapplication-TileImage'> - <link href='/images/favicon/favicon-32x32.png' rel='icon' sizes='32x32'> - <!-- Android 5 Chrome Color --> - <meta content='#EE6E73' name='theme-color'> - <!-- CSS --> - <link href='/stylesheets/highlight.css' media='screen,projection' rel='stylesheet' type='text/css'> - <link href='/stylesheets/style.css' media='screen,projection' rel='stylesheet' type='text/css'> - <link href='http://fonts.googleapis.com/css?family=Inconsolata' rel='stylesheet' type='text/css'> - <link href='http://fonts.googleapis.com/icon?family=Material+Icons' rel='stylesheet'> - </head> - <body> - <header> - <div class='container'> - <a class='button-collapse top-nav waves-effect waves-light circle hide-on-large-only' data-activates='nav-mobile' href='#'> - <i class='material-icons'>menu</i> - </a> - </div> - <ul class='side-nav fixed' id='nav-mobile'> - <li class='logo'> - <a class='brand-logo' href='/' id='logo-container'> - <object data='/images/logo.svg' id='front-page-logo' type='image/svg+xml'>Your browser does not support SVG</object> - </a> - </li> - <li class='search'> - <div class='search-wrapper card'> - <input id='search'> - <i class='material-icons'>search</i> - <div class='search-results'></div> - </div> - </li> - <li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/">Overview</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Introduction</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/about.html">About</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/getting-started.html">Getting Started</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/faq.html">FAQ</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Documentation</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/aparapi-patterns.html">Aparapi Patterns</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/choosing-specific-devices.html">Choosing Specific Devices</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/converting-java-to-opencl.html">Converting Java to OpenCL</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/emulating-multiple-entrypoints.html">Emulating Multiple Entrypoints</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/explicit-buffer-handling.html">Explicit Buffer Handling</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/hsa-enabled-lambda.html">HSA Enabled Lambda</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/kernel-guidelines.html">Kernel Guidelines</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/library-agent-duality.html">Library Agent Duality</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/new-features.html">New Features</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/opencl-bindings.html">OpenCL Bindings</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/private-memory-space.html">Private Memory Space</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/profiling-the-kernel.html">Profiling the Kernel</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/setting-up-hsa.html">Setting Up HSA</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/unit-tests.html">Unit Tests</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/using-hsa-simulator.html">Using HSA Simulator</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/constant-memory.html">Constant Memory</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/local-memory.html">Local Memory</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/multiple-dim-ranges.html">Multiple Dim Ranges</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Proposals</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/multiple-dim-nd-range.html">Multiple Dim ND Range</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/lambdas.html">Lambdas</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/address-space-with-buffers.html">Address Space with Buffers</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/extensions.html">Extensions</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/device.html">Device</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/multiple-entry-points.html">Multiple Entry Points</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/lambda-syntax.html">Lambda Syntax</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/showcase.html">Showcase</a></li></ul></li> - </ul> - </header> - <main> - <div class='section no-pad-bot' id='index-banner'> - <div class='container'> - <h1 class='header center-on-small-only'>Constant Memory</h1> - <div class='row center'> - <h4 class='header col s12 light center'>How to make use of constant memory in a Kernel.</h4> - </div> - - </div> - - </div> - <div class='container'> - <h2>How to make use of new constant memory feature</h2> - - <p>By default all primitive arrays accessed by an Aparapi Kernel is considered global. If we look at the generated code using <code>-Dcom.aparapi.enableShowGeneratedOpenCL=true</code> we will see that primitive arrays (such as <code>int buf[]</code>) are mapped to <code>__global</code> pointers (such as <code>__global int *buf</code>) in OpenCL.</p> - - <p>Although this makes Aparapi easy to use (especially to Java developers who are unfamiliar to tiered memory hierarchies), it does limit the ability of the ‘power developer’ wanting to extract more performance from Aparapi on the GPU.</p> - - <p>This <a href="http://www.amd.com/us/products/technologies/stream-technology/opencl/pages/opencl-intro.aspx?cmpid=cp_article_2_2010" rel="nofollow">page</a> from AMD’s website shows the different types of memory that OpenCL programmers can exploit.</p> - - <p>Global memory buffers in Aparapi (primitive Java arrays) are stored in host memory and are copied to Global memory (the RAM of the GPU card).</p> - - <p>Local memory is 'closer’ to the compute devices and not copied from the host memory, it is just allocated for use on the device. The use of local memory on OpenCL can lead to much more performant code as the cost of fetching from local memory is much lower.</p> - - <p>Local memory is shared by all work item’s (kernel instances) executing in the same group. This is why the use of local memory was deferred until we had a satisfactory mechanism for specifying a required group size.</p> - - <p>We recently also added support for constant memory for data that needs to be written once to the GPU but will not change.</p> - - <p>Aparapi only supports constant arrays, not scalers.</p> - - <h2>How to define a primitive array as “constant”</h2> - - <p>We have two ways define a constant buffer. Either we can decorate the variable name with a _$constant$ suffix (yes it is a valid identifier n Java).</p> - <pre class="highlight java"><code> - <span class="kd">final</span> <span class="kt">int</span><span class="o">[]</span> <span class="n">buffer</span> <span class="o">=</span> <span class="k">new</span> <span class="kt">int</span><span class="o">[</span><span class="mi">1024</span><span class="o">];</span> <span class="c1">// this is global accessable to all work items.</span> - <span class="kd">final</span> <span class="kt">int</span><span class="o">[]</span> <span class="n">buffer_$constant</span><span class="err">$</span> <span class="o">=</span> <span class="k">new</span> <span class="kt">int</span><span class="o">[]{</span><span class="mi">1</span><span class="o">,</span><span class="mi">2</span><span class="o">,</span><span class="mi">3</span><span class="o">,</span><span class="mi">4</span><span class="o">,</span><span class="mi">5</span><span class="o">,</span><span class="mi">6</span><span class="o">,</span><span class="mi">7</span><span class="o">,</span><span class="mi">8</span><span class="o">,</span><span class="mi">9</span><span class="o">}</span> <span class="c1">// this is a constant buffer</span> - - <span class="n">Kernel</span> <span class="n">k</span> <span class="o">=</span> <span class="k">new</span> <span class="n">Kernel</span><span class="o">(){</span> - <span class="kd">public</span> <span class="kt">void</span> <span class="nf">run</span><span class="o">(){</span> - <span class="c1">// access buffer</span> - <span class="c1">// access buffer_$constant$</span> - <span class="c1">// ....</span> - <span class="o">}</span> - <span class="o">}</span> - </code></pre> - <p>Alternatively (if defining inside the derived Kernel class - cannot be used via anonymous inner class pattern above!) we can can use the @Constant annotation.</p> - <pre class="highlight java"><code> - <span class="kd">final</span> <span class="kt">int</span><span class="o">[]</span> <span class="n">buffer</span> <span class="o">=</span> <span class="k">new</span> <span class="kt">int</span><span class="o">[</span><span class="mi">1024</span><span class="o">];</span> <span class="c1">// this is global accessable to all work items.</span> - - <span class="n">Kernel</span> <span class="n">k</span> <span class="o">=</span> <span class="k">new</span> <span class="n">Kernel</span><span class="o">(){</span> - <span class="nd">@Constant</span> <span class="kt">int</span><span class="o">[]</span> <span class="n">constantBuffer</span> <span class="o">=</span> <span class="k">new</span> <span class="kt">int</span><span class="o">[]{</span><span class="mi">1</span><span class="o">,</span><span class="mi">2</span><span class="o">,</span><span class="mi">3</span><span class="o">,</span><span class="mi">4</span><span class="o">,</span><span class="mi">5</span><span class="o">,</span><span class="mi">6</span><span class="o">,</span><span class="mi">7</span><span class="o">,</span><span class="mi">8</span><span class="o">,</span><span class="mi">9</span><span class="o">}</span> <span class="c1">// this is a constant buffer</span> - <span class="kd">public</span> <span class="kt">void</span> <span class="nf">run</span><span class="o">(){</span> - <span class="c1">// access buffer</span> - <span class="c1">// access constantBuffers</span> - <span class="c1">// ....</span> - <span class="o">}</span> - <span class="o">}</span> - </code></pre> - <h2>Can I see some code?</h2> - - <p>I updated the Mandelbrot example so that the pallete of RGB values is represented using constant memory, the source can be found here. Look at line #95. BTW for me this resulted in a 5-7 % performance improvement.</p> - - <p><a href="tp://code.google.com/p/aparapi/source/browse/trunk/samples/mandel/src/com/amd/aparapi/sample/mandel/Main.java" rel="nofollow">http://code.google.com/p/aparapi/source/browse/trunk/samples/mandel/src/com/amd/aparapi/sample/mandel/Main.java</a></p> - </div> - </main> - <footer class='page-footer'> - <div class='container'> - <div class='row'> - <div class='col l4 s12'> - <h5 class='white-text'>Help Aparapi Grow</h5> - <p class='grey-text text-lighten-4'>We are a team of volunteers working on this project like it's our full time job. Any amount would help support and continue development on this project and is greatly appreciated.</p> - <form action='https://www.paypal.com/cgi-bin/webscr' id='paypal-donate' method='post' target='_top'> - <input name='cmd' type='hidden' value='_s-xclick'> - <input name='encrypted' type='hidden' value='-----BEGIN PKCS7-----MIIHoAYJKoZIhvcNAQcEoIIHkTCCB40CAQExggEwMIIBLAIBADCBlDCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20CAQAwDQYJKoZIhvcNAQEBBQAEgYATcKxN8t35TG2x34eY272SuZO3QbGy+BTGIM5DRV6Hmosotzw2TF42ceWmbXb3Gk4Wy5kUgo4TgHExCZHUSlHUl+A9KWLFejotgQJPhbiBsnns3klWbKftA3LEnP/kz/SW7OyBlpluoHoEGb354/aoX3JEctp3akHiZEmD7JyEgjELMAkGBSsOAwIaBQAwggEcBgkqhkiG9w0BBwEwFAYIKoZIhvcNAwcECOGCJwba6JICgIH4RtE1LE3juagKs+swI5tb9Y2LacWo+qn1H1aLKeg57bQMqqcWYvkoO1joYoglPc1h4mO0egZjHPQ6ih0K0IYlXw2SRpNylSlIMUE3GW6smjSSwRhscZfXQYUnmQsfYvkFwoKrlZGf/1u0Q7nwlZ1szIKnDMZ5f+k8xBcM0sMNutn/y9CH6A3zo01gQBIF29+1WYAoQspNAnfWQy3ydV7nbjIA9ThDp2WquWw3EVlvqlvm/3C2AFuH/L4q0ltn3qjkCdzXK0O2jW3TRrzligPkAy6CN0Tw2jGW5GENNC1L92vHFH4kBXUPlhvw39TgoN7/KRUjVoYPYgugggOHMIIDgzCCAuygAwIBAgIBADANBgkqhkiG9w0BAQUFADCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20wHhcNMDQwMjEzMTAxMzE1WhcNMzUwMjEzMTAxMzE1WjCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20wgZ8wDQYJKoZIhvcNAQEBBQADgY0AMIGJAoGBAMFHTt38RMxLXJyO2SmS+Ndl72T7oKJ4u4uw+6awntALWh03PewmIJuzbALScsTS4sZoS1fKciBGoh11gIfHzylvkdNe/hJl66/RGqrj5rFb08sAABNTzDTiqqNpJeBsYs/c2aiGozptX2RlnBktH+SUNpAajW724Nv2Wvhif6sFAgMBAAGjge4wgeswHQYDVR0OBBYEFJaffLvGbxe9WT9S1wob7BDWZJRrMIG7BgNVHSMEgbMwgbCAFJaffLvGbxe9WT9S1wob7BDWZJRroYGUpIGRMIGOMQswCQYDVQQGEwJVUzELMAkGA1UECBMCQ0ExFjAUBgNVBAcTDU1vdW50YWluIFZpZXcxFDASBgNVBAoTC1BheVBhbCBJbmMuMRMwEQYDVQQLFApsaXZlX2NlcnRzMREwDwYDVQQDFAhsaXZlX2FwaTEcMBoGCSqGSIb3DQEJARYNcmVAcGF5cGFsLmNvbYIBADAMBgNVHRMEBTADAQH/MA0GCSqGSIb3DQEBBQUAA4GBAIFfOlaagFrl71+jq6OKidbWFSE+Q4FqROvdgIONth+8kSK//Y/4ihuE4Ymvzn5ceE3S/iBSQQMjyvb+s2TWbQYDwcp129OPIbD9epdr4tJOUNiSojw7BHwYRiPh58S1xGlFgHFXwrEBb3dgNbMUa+u4qectsMAXpVHnD9wIyfmHMYIBmjCCAZYCAQEwgZQwgY4xCzAJBgNVBAYTAlVTMQswCQYDVQQIEwJDQTEWMBQGA1UEBxMNTW91bnRhaW4gVmlldzEUMBIGA1UEChMLUGF5UGFsIEluYy4xEzARBgNVBAsUCmxpdmVfY2VydHMxETAPBgNVBAMUCGxpdmVfYXBpMRwwGgYJKoZIhvcNAQkBFg1yZUBwYXlwYWwuY29tAgEAMAkGBSsOAwIaBQCgXTAYBgkqhkiG9w0BCQMxCwYJKoZIhvcNAQcBMBwGCSqGSIb3DQEJBTEPFw0xNjExMjkyMjA1NTNaMCMGCSqGSIb3DQEJBDEWBBS4i3Exr/pFcKOJy8uKmH+nGIMjqDANBgkqhkiG9w0BAQEFAASBgDAbFZ2jieloeB/0wCAcvYCFAIXmmBaMS5js/byzU5gK7exSTlRMX74IkmHemItaOcw3wyFlu4i118D9K9SbSbFiX9DGDcezGh42u/6G8TuZMwlvmiehwMioTVcm4jWG40YLiv8pJZypfoSx2w4IAFb4na5i/E1qOrwQOpiBho+s-----END PKCS7-----'> - <button alt='PayPal - The safer, easier way to pay online!' class='btn waves-effect waves-light red lighten-3' name='action' type='submit'> - Donate Now - </button> - </form> - </div> - <div class='col l4 s12'> - <h5 class='white-text'>Join the Discussion</h5> - <p class='grey-text text-lighten-4'>We have a Gitter chat room set up where you can talk directly with us. Come in and discuss new features, future goals, general problems or questions, or anything else you can think of.</p> - <a class='btn waves-effect waves-light red lighten-3' href='https://gitter.im/Syncleus/aparapi' target='_blank'>Chat</a> - </div> - <div class='col l4 s12' style='overflow: hidden;'> - <h5 class='white-text'>Connect</h5> - <iframe allowtransparency='true' frameborder='0' height='30' scrolling='0' src='http://ghbtns.com/github-btn.html?user=Syncleus&repo=aparapi&type=watch&count=true&size=large' width='170'></iframe> - <br> - <a class='twitter-follow-button' data-dnt='true' data-show-count='true' data-size='large' href='https://twitter.com/AparapiLib'>Follow @AparapiLib</a> - <br> - <div class='g-follow' data-annotation='bubble' data-height='24' data-href='https://plus.google.com/102266131584900704956' data-rel='publisher'></div> - </div> - </div> - </div> - <div class='footer-copyright'> - <div class='container'> - © 2016-2017 Syncleus, All rights reserved. - <a class='grey-text text-lighten-4 right' href='https://github.com/Syncleus/aparapi/blob/master/LICENSE'>Apache License v2</a> - </div> - </div> - </footer> - <!-- Scripts --> - <script src='https://code.jquery.com/jquery-2.1.4.min.js'></script> - <script> - if (!window.jQuery) { document.write('<script src="bin/jquery-2.1.1.min.js"><\/script>'); } - </script> - <script src='/javascripts/jquery.timeago.js'></script> - <script src='/javascripts/materialize.min.js'></script> - <script src='/javascripts/lunr.min.js'></script> - <script src='/javascripts/search.js'></script> - <script src='/javascripts/materialize.js'></script> - <script src='/javascripts/init.js'></script> - <!-- Twitter Button --> - <script> - !function(d,s,id){var js,fjs=d.getElementsByTagName(s)[0],p=/^http:/.test(d.location)?'http':'https';if(!d.getElementById(id)){js=d.createElement(s);js.id=id;js.src=p+'://platform.twitter.com/widgets.js';fjs.parentNode.insertBefore(js,fjs);}}(document, 'script', 'twitter-wjs'); - </script> - <!-- Google Plus Button --> - <script async='' defer='defer' src='https://apis.google.com/js/platform.js'></script> - </body> +<head> +<meta content='text/html; charset=UTF-8' http-equiv='Content-Type'> +<meta content='width=device-width, initial-scale=1' name='viewport'> +<meta content='IE=edge' http-equiv='X-UA-Compatible'> +<meta content='no' name='msapplication-tap-highlight'> +<meta content='Aparapi is an Open-source framework for executing native Java code on the GPU, developed by Syncleus.' name='description'> +<title> +Aparapi | Constant Memory +</title> +<!-- Favicons --> +<link href='/images/favicon/apple-touch-icon-152x152.png' rel='apple-touch-icon-precomposed'> +<meta content='#FFFFFF' name='msapplication-TileColor'> +<meta content='/images/favicon/mstile-144x144.png' name='msapplication-TileImage'> +<link href='/images/favicon/favicon-32x32.png' rel='icon' sizes='32x32'> +<!-- Android 5 Chrome Color --> +<meta content='#EE6E73' name='theme-color'> +<!-- CSS --> +<link href='/stylesheets/highlight.css' media='screen,projection' rel='stylesheet' type='text/css'> +<link href='/stylesheets/style.css' media='screen,projection' rel='stylesheet' type='text/css'> +<link href='http://fonts.googleapis.com/css?family=Inconsolata' rel='stylesheet' type='text/css'> +<link href='http://fonts.googleapis.com/icon?family=Material+Icons' rel='stylesheet'> +</head> +<body> +<header> +<div class='container'> +<a class='button-collapse top-nav waves-effect waves-light circle hide-on-large-only' data-activates='nav-mobile' href='#'> +<i class='material-icons'>menu</i> +</a> +</div> +<ul class='side-nav fixed' id='nav-mobile'> +<li class='logo'> +<a class='brand-logo' href='/' id='logo-container'> +<object data='/images/logo.svg' id='front-page-logo' type='image/svg+xml'>Your browser does not support SVG</object> +</a> +</li> +<li class='search'> +<div class='search-wrapper card'> +<input id='search'> +<i class='material-icons'>search</i> +<div class='search-results'></div> +</div> +</li> +<li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/">Overview</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Introduction</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/about.html">About</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/getting-started.html">Getting Started</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/faq.html">FAQ</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Documentation</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/aparapi-patterns.html">Aparapi Patterns</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/choosing-specific-devices.html">Choosing Specific Devices</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/converting-java-to-opencl.html">Converting Java to OpenCL</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/emulating-multiple-entrypoints.html">Emulating Multiple Entrypoints</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/explicit-buffer-handling.html">Explicit Buffer Handling</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/hsa-enabled-lambda.html">HSA Enabled Lambda</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/kernel-guidelines.html">Kernel Guidelines</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/library-agent-duality.html">Library Agent Duality</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/new-features.html">New Features</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/opencl-bindings.html">OpenCL Bindings</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/private-memory-space.html">Private Memory Space</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/profiling-the-kernel.html">Profiling the Kernel</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/setting-up-hsa.html">Setting Up HSA</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/unit-tests.html">Unit Tests</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/using-hsa-simulator.html">Using HSA Simulator</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/constant-memory.html">Constant Memory</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/local-memory.html">Local Memory</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/multiple-dim-ranges.html">Multiple Dim Ranges</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Proposals</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/multiple-dim-nd-range.html">Multiple Dim ND Range</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/lambdas.html">Lambdas</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/address-space-with-buffers.html">Address Space with Buffers</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/extensions.html">Extensions</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/device.html">Device</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/multiple-entry-points.html">Multiple Entry Points</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/lambda-syntax.html">Lambda Syntax</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/showcase.html">Showcase</a></li></ul></li> +</ul> +</header> +<main> +<div class='section no-pad-bot' id='index-banner'> +<div class='container'> +<h1 class='header center-on-small-only'>Constant Memory</h1> +<div class='row center'> +<h4 class='header col s12 light center'>How to make use of constant memory in a Kernel.</h4> +</div> + +</div> + +</div> +<div class='container'> +<h2>How to make use of new constant memory feature</h2> + +<p>By default all primitive arrays accessed by an Aparapi Kernel is considered global. If we look at the generated code using <code>-Dcom.aparapi.enableShowGeneratedOpenCL=true</code> we will see that primitive arrays (such as <code>int buf[]</code>) are mapped to <code>__global</code> pointers (such as <code>__global int *buf</code>) in OpenCL.</p> + +<p>Although this makes Aparapi easy to use (especially to Java developers who are unfamiliar to tiered memory hierarchies), it does limit the ability of the ‘power developer’ wanting to extract more performance from Aparapi on the GPU.</p> + +<p>This <a href="http://www.amd.com/us/products/technologies/stream-technology/opencl/pages/opencl-intro.aspx?cmpid=cp_article_2_2010" rel="nofollow">page</a> from AMD’s website shows the different types of memory that OpenCL programmers can exploit.</p> + +<p>Global memory buffers in Aparapi (primitive Java arrays) are stored in host memory and are copied to Global memory (the RAM of the GPU card).</p> + +<p>Local memory is ‘closer’ to the compute devices and not copied from the host memory, it is just allocated for use on the device. The use of local memory on OpenCL can lead to much more performant code as the cost of fetching from local memory is much lower.</p> + +<p>Local memory is shared by all work item’s (kernel instances) executing in the same group. This is why the use of local memory was deferred until we had a satisfactory mechanism for specifying a required group size.</p> + +<p>We recently also added support for constant memory for data that needs to be written once to the GPU but will not change.</p> + +<p>Aparapi only supports constant arrays, not scalers.</p> + +<h2>How to define a primitive array as “constant”</h2> + +<p>We have two ways define a constant buffer. Either we can decorate the variable name with a _$constant$ suffix (yes it is a valid identifier n Java).</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="kd">final</span> <span class="kt">int</span><span class="o">[]</span> <span class="n">buffer</span> <span class="o">=</span> <span class="k">new</span> <span class="kt">int</span><span class="o">[</span><span class="mi">1024</span><span class="o">];</span> <span class="c1">// this is global accessable to all work items.</span> +<span class="kd">final</span> <span class="kt">int</span><span class="o">[]</span> <span class="n">buffer_$constant</span><span class="err">$</span> <span class="o">=</span> <span class="k">new</span> <span class="kt">int</span><span class="o">[]{</span><span class="mi">1</span><span class="o">,</span><span class="mi">2</span><span class="o">,</span><span class="mi">3</span><span class="o">,</span><span class="mi">4</span><span class="o">,</span><span class="mi">5</span><span class="o">,</span><span class="mi">6</span><span class="o">,</span><span class="mi">7</span><span class="o">,</span><span class="mi">8</span><span class="o">,</span><span class="mi">9</span><span class="o">}</span> <span class="c1">// this is a constant buffer</span> + +<span class="n">Kernel</span> <span class="n">k</span> <span class="o">=</span> <span class="k">new</span> <span class="n">Kernel</span><span class="o">(){</span> + <span class="kd">public</span> <span class="kt">void</span> <span class="nf">run</span><span class="o">(){</span> + <span class="c1">// access buffer</span> + <span class="c1">// access buffer_$constant$</span> + <span class="c1">// ....</span> + <span class="o">}</span> +<span class="o">}</span> +</code></pre></div> +<p>Alternatively (if defining inside the derived Kernel class - cannot be used via anonymous inner class pattern above!) we can can use the @Constant annotation.</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="kd">final</span> <span class="kt">int</span><span class="o">[]</span> <span class="n">buffer</span> <span class="o">=</span> <span class="k">new</span> <span class="kt">int</span><span class="o">[</span><span class="mi">1024</span><span class="o">];</span> <span class="c1">// this is global accessable to all work items.</span> + +<span class="n">Kernel</span> <span class="n">k</span> <span class="o">=</span> <span class="k">new</span> <span class="n">Kernel</span><span class="o">(){</span> + <span class="nd">@Constant</span> <span class="kt">int</span><span class="o">[]</span> <span class="n">constantBuffer</span> <span class="o">=</span> <span class="k">new</span> <span class="kt">int</span><span class="o">[]{</span><span class="mi">1</span><span class="o">,</span><span class="mi">2</span><span class="o">,</span><span class="mi">3</span><span class="o">,</span><span class="mi">4</span><span class="o">,</span><span class="mi">5</span><span class="o">,</span><span class="mi">6</span><span class="o">,</span><span class="mi">7</span><span class="o">,</span><span class="mi">8</span><span class="o">,</span><span class="mi">9</span><span class="o">}</span> <span class="c1">// this is a constant buffer</span> + <span class="kd">public</span> <span class="kt">void</span> <span class="nf">run</span><span class="o">(){</span> + <span class="c1">// access buffer</span> + <span class="c1">// access constantBuffers</span> + <span class="c1">// ....</span> + <span class="o">}</span> +<span class="o">}</span> +</code></pre></div> +<h2>Can I see some code?</h2> + +<p>I updated the Mandelbrot example so that the pallete of RGB values is represented using constant memory, the source can be found here. Look at line #95. BTW for me this resulted in a 5-7 % performance improvement.</p> + +<p><a href="tp://code.google.com/p/aparapi/source/browse/trunk/samples/mandel/src/com/amd/aparapi/sample/mandel/Main.java" rel="nofollow">http://code.google.com/p/aparapi/source/browse/trunk/samples/mandel/src/com/amd/aparapi/sample/mandel/Main.java</a></p> + +</div> +</main> +<footer class='page-footer'> +<div class='container'> +<div class='row'> +<div class='col l4 s12'> +<h5 class='white-text'>Help Aparapi Grow</h5> +<p class='grey-text text-lighten-4'>We are a team of volunteers working on this project like it's our full time job. Any amount would help support and continue development on this project and is greatly appreciated.</p> +<form action='https://www.paypal.com/cgi-bin/webscr' id='paypal-donate' method='post' target='_top'> +<input name='cmd' type='hidden' value='_s-xclick'> +<input name='encrypted' type='hidden' value='-----BEGIN PKCS7-----MIIHoAYJKoZIhvcNAQcEoIIHkTCCB40CAQExggEwMIIBLAIBADCBlDCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20CAQAwDQYJKoZIhvcNAQEBBQAEgYATcKxN8t35TG2x34eY272SuZO3QbGy+BTGIM5DRV6Hmosotzw2TF42ceWmbXb3Gk4Wy5kUgo4TgHExCZHUSlHUl+A9KWLFejotgQJPhbiBsnns3klWbKftA3LEnP/kz/SW7OyBlpluoHoEGb354/aoX3JEctp3akHiZEmD7JyEgjELMAkGBSsOAwIaBQAwggEcBgkqhkiG9w0BBwEwFAYIKoZIhvcNAwcECOGCJwba6JICgIH4RtE1LE3juagKs+swI5tb9Y2LacWo+qn1H1aLKeg57bQMqqcWYvkoO1joYoglPc1h4mO0egZjHPQ6ih0K0IYlXw2SRpNylSlIMUE3GW6smjSSwRhscZfXQYUnmQsfYvkFwoKrlZGf/1u0Q7nwlZ1szIKnDMZ5f+k8xBcM0sMNutn/y9CH6A3zo01gQBIF29+1WYAoQspNAnfWQy3ydV7nbjIA9ThDp2WquWw3EVlvqlvm/3C2AFuH/L4q0ltn3qjkCdzXK0O2jW3TRrzligPkAy6CN0Tw2jGW5GENNC1L92vHFH4kBXUPlhvw39TgoN7/KRUjVoYPYgugggOHMIIDgzCCAuygAwIBAgIBADANBgkqhkiG9w0BAQUFADCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20wHhcNMDQwMjEzMTAxMzE1WhcNMzUwMjEzMTAxMzE1WjCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20wgZ8wDQYJKoZIhvcNAQEBBQADgY0AMIGJAoGBAMFHTt38RMxLXJyO2SmS+Ndl72T7oKJ4u4uw+6awntALWh03PewmIJuzbALScsTS4sZoS1fKciBGoh11gIfHzylvkdNe/hJl66/RGqrj5rFb08sAABNTzDTiqqNpJeBsYs/c2aiGozptX2RlnBktH+SUNpAajW724Nv2Wvhif6sFAgMBAAGjge4wgeswHQYDVR0OBBYEFJaffLvGbxe9WT9S1wob7BDWZJRrMIG7BgNVHSMEgbMwgbCAFJaffLvGbxe9WT9S1wob7BDWZJRroYGUpIGRMIGOMQswCQYDVQQGEwJVUzELMAkGA1UECBMCQ0ExFjAUBgNVBAcTDU1vdW50YWluIFZpZXcxFDASBgNVBAoTC1BheVBhbCBJbmMuMRMwEQYDVQQLFApsaXZlX2NlcnRzMREwDwYDVQQDFAhsaXZlX2FwaTEcMBoGCSqGSIb3DQEJARYNcmVAcGF5cGFsLmNvbYIBADAMBgNVHRMEBTADAQH/MA0GCSqGSIb3DQEBBQUAA4GBAIFfOlaagFrl71+jq6OKidbWFSE+Q4FqROvdgIONth+8kSK//Y/4ihuE4Ymvzn5ceE3S/iBSQQMjyvb+s2TWbQYDwcp129OPIbD9epdr4tJOUNiSojw7BHwYRiPh58S1xGlFgHFXwrEBb3dgNbMUa+u4qectsMAXpVHnD9wIyfmHMYIBmjCCAZYCAQEwgZQwgY4xCzAJBgNVBAYTAlVTMQswCQYDVQQIEwJDQTEWMBQGA1UEBxMNTW91bnRhaW4gVmlldzEUMBIGA1UEChMLUGF5UGFsIEluYy4xEzARBgNVBAsUCmxpdmVfY2VydHMxETAPBgNVBAMUCGxpdmVfYXBpMRwwGgYJKoZIhvcNAQkBFg1yZUBwYXlwYWwuY29tAgEAMAkGBSsOAwIaBQCgXTAYBgkqhkiG9w0BCQMxCwYJKoZIhvcNAQcBMBwGCSqGSIb3DQEJBTEPFw0xNjExMjkyMjA1NTNaMCMGCSqGSIb3DQEJBDEWBBS4i3Exr/pFcKOJy8uKmH+nGIMjqDANBgkqhkiG9w0BAQEFAASBgDAbFZ2jieloeB/0wCAcvYCFAIXmmBaMS5js/byzU5gK7exSTlRMX74IkmHemItaOcw3wyFlu4i118D9K9SbSbFiX9DGDcezGh42u/6G8TuZMwlvmiehwMioTVcm4jWG40YLiv8pJZypfoSx2w4IAFb4na5i/E1qOrwQOpiBho+s-----END PKCS7----- +'> +<button alt='PayPal - The safer, easier way to pay online!' class='btn waves-effect waves-light red lighten-3' name='action' type='submit'> +Donate Now +</button> +</form> +</div> +<div class='col l4 s12'> +<h5 class='white-text'>Join the Discussion</h5> +<p class='grey-text text-lighten-4'>We have a Gitter chat room set up where you can talk directly with us. Come in and discuss new features, future goals, general problems or questions, or anything else you can think of.</p> +<a class='btn waves-effect waves-light red lighten-3' href='https://gitter.im/Syncleus/aparapi' target='_blank'>Chat</a> +</div> +<div class='col l4 s12' style='overflow: hidden;'> +<h5 class='white-text'>Connect</h5> +<iframe allowtransparency='true' frameborder='0' height='30' scrolling='0' src='http://ghbtns.com/github-btn.html?user=Syncleus&repo=aparapi&type=watch&count=true&size=large' width='170'></iframe> +<br> +<a class='twitter-follow-button' data-dnt='true' data-show-count='true' data-size='large' href='https://twitter.com/AparapiLib'>Follow @AparapiLib</a> +<br> +<div class='g-follow' data-annotation='bubble' data-height='24' data-href='https://plus.google.com/102266131584900704956' data-rel='publisher'></div> +</div> +</div> +</div> +<div class='footer-copyright'> +<div class='container'> +© 2016-2017 Syncleus, All rights reserved. +<a class='grey-text text-lighten-4 right' href='https://github.com/Syncleus/aparapi/blob/master/LICENSE'>Apache License v2</a> +</div> +</div> +</footer> +<!-- Scripts --> +<script src='https://code.jquery.com/jquery-2.1.4.min.js'></script> +<script> + if (!window.jQuery) { document.write('<script src="bin/jquery-2.1.1.min.js"><\/script>'); } +</script> +<script src='/javascripts/jquery.timeago.js'></script> +<script src='/javascripts/materialize.min.js'></script> +<script src='/javascripts/lunr.min.js'></script> +<script src='/javascripts/search.js'></script> +<script src='/javascripts/materialize.js'></script> +<script src='/javascripts/init.js'></script> +<!-- Twitter Button --> +<script> + !function(d,s,id){var js,fjs=d.getElementsByTagName(s)[0],p=/^http:/.test(d.location)?'http':'https';if(!d.getElementById(id)){js=d.createElement(s);js.id=id;js.src=p+'://platform.twitter.com/widgets.js';fjs.parentNode.insertBefore(js,fjs);}}(document, 'script', 'twitter-wjs'); +</script> +<!-- Google Plus Button --> +<script async='' defer='defer' src='https://apis.google.com/js/platform.js'></script> +</body> </html> diff --git a/documentation/converting-java-to-opencl.html b/documentation/converting-java-to-opencl.html index 07d44024fc6098821cfd79adbd9d86ff73aad092..b754ecccc8bab29b3f5e933e35a43233870f427f 100644 --- a/documentation/converting-java-to-opencl.html +++ b/documentation/converting-java-to-opencl.html @@ -1,396 +1,397 @@ <!DOCTYPE html> <html lang='en'> - <head> - <meta content='text/html; charset=UTF-8' http-equiv='Content-Type'> - <meta content='width=device-width, initial-scale=1' name='viewport'> - <meta content='IE=edge' http-equiv='X-UA-Compatible'> - <meta content='no' name='msapplication-tap-highlight'> - <meta content='Aparapi is an Open-source framework for executing native Java code on the GPU, developed by Syncleus.' name='description'> - <title> - Aparapi | Converting Java to OpenCL - </title> - <!-- Favicons --> - <link href='/images/favicon/apple-touch-icon-152x152.png' rel='apple-touch-icon-precomposed'> - <meta content='#FFFFFF' name='msapplication-TileColor'> - <meta content='/images/favicon/mstile-144x144.png' name='msapplication-TileImage'> - <link href='/images/favicon/favicon-32x32.png' rel='icon' sizes='32x32'> - <!-- Android 5 Chrome Color --> - <meta content='#EE6E73' name='theme-color'> - <!-- CSS --> - <link href='/stylesheets/highlight.css' media='screen,projection' rel='stylesheet' type='text/css'> - <link href='/stylesheets/style.css' media='screen,projection' rel='stylesheet' type='text/css'> - <link href='http://fonts.googleapis.com/css?family=Inconsolata' rel='stylesheet' type='text/css'> - <link href='http://fonts.googleapis.com/icon?family=Material+Icons' rel='stylesheet'> - </head> - <body> - <header> - <div class='container'> - <a class='button-collapse top-nav waves-effect waves-light circle hide-on-large-only' data-activates='nav-mobile' href='#'> - <i class='material-icons'>menu</i> - </a> - </div> - <ul class='side-nav fixed' id='nav-mobile'> - <li class='logo'> - <a class='brand-logo' href='/' id='logo-container'> - <object data='/images/logo.svg' id='front-page-logo' type='image/svg+xml'>Your browser does not support SVG</object> - </a> - </li> - <li class='search'> - <div class='search-wrapper card'> - <input id='search'> - <i class='material-icons'>search</i> - <div class='search-results'></div> - </div> - </li> - <li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/">Overview</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Introduction</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/about.html">About</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/getting-started.html">Getting Started</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/faq.html">FAQ</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Documentation</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/aparapi-patterns.html">Aparapi Patterns</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/choosing-specific-devices.html">Choosing Specific Devices</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/converting-java-to-opencl.html">Converting Java to OpenCL</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/emulating-multiple-entrypoints.html">Emulating Multiple Entrypoints</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/explicit-buffer-handling.html">Explicit Buffer Handling</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/hsa-enabled-lambda.html">HSA Enabled Lambda</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/kernel-guidelines.html">Kernel Guidelines</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/library-agent-duality.html">Library Agent Duality</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/new-features.html">New Features</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/opencl-bindings.html">OpenCL Bindings</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/private-memory-space.html">Private Memory Space</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/profiling-the-kernel.html">Profiling the Kernel</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/setting-up-hsa.html">Setting Up HSA</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/unit-tests.html">Unit Tests</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/using-hsa-simulator.html">Using HSA Simulator</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/constant-memory.html">Constant Memory</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/local-memory.html">Local Memory</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/multiple-dim-ranges.html">Multiple Dim Ranges</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Proposals</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/multiple-dim-nd-range.html">Multiple Dim ND Range</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/lambdas.html">Lambdas</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/address-space-with-buffers.html">Address Space with Buffers</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/extensions.html">Extensions</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/device.html">Device</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/multiple-entry-points.html">Multiple Entry Points</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/lambda-syntax.html">Lambda Syntax</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/showcase.html">Showcase</a></li></ul></li> - </ul> - </header> - <main> - <div class='section no-pad-bot' id='index-banner'> - <div class='container'> - <h1 class='header center-on-small-only'>Converting Java to OpenCL</h1> - <div class='row center'> - <h4 class='header col s12 light center'>How Aparapi converts bytecode to OpenCL</h4> - </div> - - </div> - - </div> - <div class='container'> - <h2>Introduction</h2> - - <p>This page acts as a quick summary for <a href="/documentation/ByteCode2OpenCL.pdf" rel="nofollow">the more detailed PDF</a> that was originally written by AMD.</p> - - <p>One of the unique Aparapi features is it’s ability to convert Java bytecode to OpenCL automatically.</p> - - <p>In this page we will try to describe the process used to perform this conversion. If you are unfamiliar with bytecode consider visiting this page WhatIsBytecode.</p> - - <p>The command</p> - <pre class="highlight java"><code> - <span class="n">javac</span> <span class="n">Source</span><span class="o">.</span><span class="na">java</span> - </code></pre> - <p>Will compile the java source file Source.java to Source.class</p> - - <p>The classfile format is well documented here and we will not go into too much detail here, however it should be known that Aparapi must parse the classfile of each Kernel to extract the bytecode for the <code>Kernel.run()</code> and any method reachable from <code>Kernel.run()</code>.</p> - - <p>Lets start with a simple Kernel.</p> - <pre class="highlight java"><code> - <span class="kn">import</span> <span class="nn">com.aparapi.Kernel</span><span class="o">;</span> - - <span class="kd">public</span> <span class="kd">class</span> <span class="nc">Squarer</span> <span class="kd">extends</span> <span class="n">Kernel</span><span class="o">{</span> - <span class="kt">int</span><span class="o">[]</span> <span class="n">in</span><span class="o">;</span> - <span class="kt">int</span><span class="o">[]</span> <span class="n">out</span><span class="o">;</span> - <span class="nd">@Override</span> <span class="kd">public</span> <span class="kt">void</span> <span class="nf">run</span><span class="o">(){</span> - <span class="kt">int</span> <span class="n">gid</span> <span class="o">=</span> <span class="n">getGlobalId</span><span class="o">(</span><span class="mi">0</span><span class="o">);</span> - <span class="n">out</span><span class="o">[</span><span class="n">gid</span><span class="o">]</span> <span class="o">=</span> <span class="n">in</span><span class="o">[</span><span class="n">gid</span><span class="o">]</span> <span class="o">*</span> <span class="n">in</span><span class="o">[</span><span class="n">gid</span><span class="o">];</span> - <span class="o">}</span> - <span class="o">}</span> - </code></pre> - <p>We will compile this</p> - <pre class="highlight java"><code> - <span class="n">javac</span> <span class="o">-</span><span class="n">g</span> <span class="o">-</span><span class="n">cp</span> <span class="n">path</span><span class="o">/</span><span class="n">to</span><span class="o">/</span><span class="n">aparapi</span><span class="o">/</span><span class="n">aparapi</span><span class="o">.</span><span class="na">jar</span> <span class="n">Squarer</span><span class="o">.</span><span class="na">java</span> - </code></pre> - <p>and then we can look at the bytecode using javap</p> - <pre class="highlight java"><code> - <span class="n">javap</span> <span class="o">-</span><span class="n">c</span> <span class="o">-</span><span class="n">classpath</span> <span class="n">path</span><span class="o">/</span><span class="n">to</span><span class="o">/</span><span class="n">aparapi</span><span class="o">/</span><span class="n">aparapi</span><span class="o">.</span><span class="na">jar</span><span class="o">;.</span> <span class="n">Squarer</span> - </code></pre> - <p>Compiled from “Squarer.java”</p> - <pre class="highlight java"><code> - <span class="kd">public</span> <span class="kd">class</span> <span class="nc">Squarer</span> <span class="kd">extends</span> <span class="n">com</span><span class="o">.</span><span class="na">aparapi</span><span class="o">.</span><span class="na">Kernel</span> - <span class="nl">SourceFile:</span> <span class="s">"Squarer.java"</span> - <span class="n">minor</span> <span class="nl">version:</span> <span class="mi">0</span> - <span class="n">major</span> <span class="nl">version:</span> <span class="mi">50</span> - <span class="n">Constant</span> <span class="nl">pool:</span> - <span class="kd">const</span> <span class="err">#</span><span class="mi">1</span> <span class="o">=</span> <span class="n">Method</span> <span class="err">#</span><span class="mi">6</span><span class="o">.</span><span class="err">#</span><span class="mi">17</span><span class="o">;</span> <span class="c1">// com/amd/aparapi/Kernel."<init>":()V</span> - <span class="kd">const</span> <span class="err">#</span><span class="mi">2</span> <span class="o">=</span> <span class="n">Method</span> <span class="err">#</span><span class="mi">5</span><span class="o">.</span><span class="err">#</span><span class="mi">18</span><span class="o">;</span> <span class="c1">// Squarer.getGlobalId:(I)I</span> - <span class="kd">const</span> <span class="err">#</span><span class="mi">3</span> <span class="o">=</span> <span class="n">Field</span> <span class="err">#</span><span class="mi">5</span><span class="o">.</span><span class="err">#</span><span class="mi">19</span><span class="o">;</span> <span class="c1">// Squarer.out:[I</span> - <span class="kd">const</span> <span class="err">#</span><span class="mi">4</span> <span class="o">=</span> <span class="n">Field</span> <span class="err">#</span><span class="mi">5</span><span class="o">.</span><span class="err">#</span><span class="mi">20</span><span class="o">;</span> <span class="c1">// Squarer.in:[I</span> - <span class="kd">const</span> <span class="err">#</span><span class="mi">5</span> <span class="o">=</span> <span class="kd">class</span> <span class="err">#21;</span> <span class="err">//</span> <span class="nc">Squarer</span> - <span class="kd">const</span> <span class="err">#</span><span class="mi">6</span> <span class="o">=</span> <span class="kd">class</span> <span class="err">#22;</span> <span class="err">//</span> <span class="nc">com</span><span class="o">/</span><span class="n">amd</span><span class="o">/</span><span class="n">aparapi</span><span class="o">/</span><span class="n">Kernel</span> - <span class="kd">const</span> <span class="err">#</span><span class="mi">7</span> <span class="o">=</span> <span class="n">Asciz</span> <span class="n">in</span><span class="o">;</span> - <span class="kd">const</span> <span class="err">#</span><span class="mi">8</span> <span class="o">=</span> <span class="n">Asciz</span> <span class="o">[</span><span class="n">I</span><span class="o">;</span> - <span class="kd">const</span> <span class="err">#</span><span class="mi">9</span> <span class="o">=</span> <span class="n">Asciz</span> <span class="n">out</span><span class="o">;</span> - <span class="kd">const</span> <span class="err">#</span><span class="mi">10</span> <span class="o">=</span> <span class="n">Asciz</span> <span class="o"><</span><span class="n">init</span><span class="o">>;</span> - <span class="kd">const</span> <span class="err">#</span><span class="mi">11</span> <span class="o">=</span> <span class="n">Asciz</span> <span class="o">()</span><span class="n">V</span><span class="o">;</span> - <span class="kd">const</span> <span class="err">#</span><span class="mi">12</span> <span class="o">=</span> <span class="n">Asciz</span> <span class="n">Code</span><span class="o">;</span> - <span class="kd">const</span> <span class="err">#</span><span class="mi">13</span> <span class="o">=</span> <span class="n">Asciz</span> <span class="n">LineNumberTable</span><span class="o">;</span> - <span class="kd">const</span> <span class="err">#</span><span class="mi">14</span> <span class="o">=</span> <span class="n">Asciz</span> <span class="n">run</span><span class="o">;</span> - <span class="kd">const</span> <span class="err">#</span><span class="mi">15</span> <span class="o">=</span> <span class="n">Asciz</span> <span class="n">SourceFile</span><span class="o">;</span> - <span class="kd">const</span> <span class="err">#</span><span class="mi">16</span> <span class="o">=</span> <span class="n">Asciz</span> <span class="n">Squarer</span><span class="o">.</span><span class="na">java</span><span class="o">;</span> - <span class="kd">const</span> <span class="err">#</span><span class="mi">17</span> <span class="o">=</span> <span class="n">NameAndType</span> <span class="err">#</span><span class="mi">10</span><span class="o">:</span><span class="err">#</span><span class="mi">11</span><span class="o">;</span><span class="c1">// "<init>":()V</span> - <span class="kd">const</span> <span class="err">#</span><span class="mi">18</span> <span class="o">=</span> <span class="n">NameAndType</span> <span class="err">#</span><span class="mi">23</span><span class="o">:</span><span class="err">#</span><span class="mi">24</span><span class="o">;</span><span class="c1">// getGlobalId:(I)I</span> - <span class="kd">const</span> <span class="err">#</span><span class="mi">19</span> <span class="o">=</span> <span class="n">NameAndType</span> <span class="err">#</span><span class="mi">9</span><span class="o">:</span><span class="err">#</span><span class="mi">8</span><span class="o">;</span><span class="c1">// out:[I</span> - <span class="kd">const</span> <span class="err">#</span><span class="mi">20</span> <span class="o">=</span> <span class="n">NameAndType</span> <span class="err">#</span><span class="mi">7</span><span class="o">:</span><span class="err">#</span><span class="mi">8</span><span class="o">;</span><span class="c1">// in:[I</span> - <span class="kd">const</span> <span class="err">#</span><span class="mi">21</span> <span class="o">=</span> <span class="n">Asciz</span> <span class="n">Squarer</span><span class="o">;</span> - <span class="kd">const</span> <span class="err">#</span><span class="mi">22</span> <span class="o">=</span> <span class="n">Asciz</span> <span class="n">com</span><span class="o">/</span><span class="n">amd</span><span class="o">/</span><span class="n">aparapi</span><span class="o">/</span><span class="n">Kernel</span><span class="o">;</span> - <span class="kd">const</span> <span class="err">#</span><span class="mi">23</span> <span class="o">=</span> <span class="n">Asciz</span> <span class="n">getGlobalId</span><span class="o">;</span> - <span class="kd">const</span> <span class="err">#</span><span class="mi">24</span> <span class="o">=</span> <span class="n">Asciz</span> <span class="o">(</span><span class="n">I</span><span class="o">)</span><span class="n">I</span><span class="o">;</span> - - <span class="o">{</span> - <span class="kt">int</span><span class="o">[]</span> <span class="n">in</span><span class="o">;</span> - - <span class="kt">int</span><span class="o">[]</span> <span class="n">out</span><span class="o">;</span> - - <span class="kd">public</span> <span class="nf">Squarer</span><span class="o">();</span> - <span class="nl">Code:</span> - <span class="n">Stack</span><span class="o">=</span><span class="mi">1</span><span class="o">,</span> <span class="n">Locals</span><span class="o">=</span><span class="mi">1</span><span class="o">,</span> <span class="n">Args_size</span><span class="o">=</span><span class="mi">1</span> - <span class="mi">0</span><span class="o">:</span> <span class="n">aload_0</span> - <span class="mi">1</span><span class="o">:</span> <span class="n">invokespecial</span> <span class="err">#</span><span class="mi">1</span><span class="o">;</span> <span class="c1">//Method com/amd/aparapi/Kernel."<init>":()V</span> - <span class="mi">4</span><span class="o">:</span> <span class="k">return</span> - - - <span class="kd">public</span> <span class="kt">void</span> <span class="nf">run</span><span class="o">();</span> - <span class="nl">Code:</span> - <span class="n">Stack</span><span class="o">=</span><span class="mi">5</span><span class="o">,</span> <span class="n">Locals</span><span class="o">=</span><span class="mi">2</span><span class="o">,</span> <span class="n">Args_size</span><span class="o">=</span><span class="mi">1</span> - <span class="mi">0</span><span class="o">:</span> <span class="n">aload_0</span> - <span class="mi">1</span><span class="o">:</span> <span class="n">iconst_0</span> - <span class="mi">2</span><span class="o">:</span> <span class="n">invokevirtual</span> <span class="err">#</span><span class="mi">2</span><span class="o">;</span> <span class="c1">//Method getGlobalId:(I)I</span> - <span class="mi">5</span><span class="o">:</span> <span class="n">istore_1</span> - <span class="mi">6</span><span class="o">:</span> <span class="n">aload_0</span> - <span class="mi">7</span><span class="o">:</span> <span class="n">getfield</span> <span class="err">#</span><span class="mi">3</span><span class="o">;</span> <span class="c1">//Field out:[I</span> - <span class="mi">10</span><span class="o">:</span> <span class="n">iload_1</span> - <span class="mi">11</span><span class="o">:</span> <span class="n">aload_0</span> - <span class="mi">12</span><span class="o">:</span> <span class="n">getfield</span> <span class="err">#</span><span class="mi">4</span><span class="o">;</span> <span class="c1">//Field in:[I</span> - <span class="mi">15</span><span class="o">:</span> <span class="n">iload_1</span> - <span class="mi">16</span><span class="o">:</span> <span class="n">iaload</span> - <span class="mi">17</span><span class="o">:</span> <span class="n">aload_0</span> - <span class="mi">18</span><span class="o">:</span> <span class="n">getfield</span> <span class="err">#</span><span class="mi">4</span><span class="o">;</span> <span class="c1">//Field in:[I</span> - <span class="mi">21</span><span class="o">:</span> <span class="n">iload_1</span> - <span class="mi">22</span><span class="o">:</span> <span class="n">iaload</span> - <span class="mi">23</span><span class="o">:</span> <span class="n">imul</span> - <span class="mi">24</span><span class="o">:</span> <span class="n">iastore</span> - <span class="mi">25</span><span class="o">:</span> <span class="k">return</span> - <span class="o">}</span> - </code></pre> - <p>Here we see constant pool of the class and the disassembled bytecode of the default constructor <code>Squarer()</code> and the <code>Squarer.run()</code> method.</p> - - <p>The constant pool is a table of constant values that can be accessed from the bytecode of any methods from within this class. Some of the constants are String literals defined within the source (or literals used to name classes, fields, methods, variables or signatures), other slots represent Classes, Methods, Fields or Type signatures. These later constant pool entries cross-reference other constant pool entries to describe higher level artifact.</p> - - <p>For example constant pool entry #1 is</p> - <pre class="highlight java"><code> - <span class="kd">const</span> <span class="err">#</span><span class="mi">1</span> <span class="o">=</span> <span class="n">Method</span> <span class="err">#</span><span class="mi">6</span><span class="o">.</span><span class="err">#</span><span class="mi">17</span><span class="o">;</span> <span class="c1">// com/amd/aparapi/Kernel."<init>":()V</span> - </code></pre> - <p>So entry #1 defines a method. The class containing the method is defined in constant pool entry #6. So lets look at constant pool entry #6.</p> - <pre class="highlight java"><code> - <span class="kd">const</span> <span class="err">#</span><span class="mi">1</span> <span class="o">=</span> <span class="n">Method</span> <span class="err">#</span><span class="mi">6</span><span class="o">.</span><span class="err">#</span><span class="mi">17</span><span class="o">;</span> <span class="c1">// com/amd/aparapi/Kernel."<init>":()V</span> - - <span class="kd">const</span> <span class="err">#</span><span class="mi">6</span> <span class="o">=</span> <span class="kd">class</span> <span class="err">#22;</span> <span class="err">//</span> <span class="nc">com</span><span class="o">/</span><span class="n">amd</span><span class="o">/</span><span class="n">aparapi</span><span class="o">/</span><span class="n">Kernel</span> - </code></pre> - <p>At constant pool entry #6 we find a class definition which refers to entry #22</p> - <pre class="highlight java"><code> - <span class="kd">const</span> <span class="err">#</span><span class="mi">1</span> <span class="o">=</span> <span class="n">Method</span> <span class="err">#</span><span class="mi">6</span><span class="o">.</span><span class="err">#</span><span class="mi">17</span><span class="o">;</span> <span class="c1">// com/amd/aparapi/Kernel."<init>":()V</span> - - <span class="kd">const</span> <span class="err">#</span><span class="mi">6</span> <span class="o">=</span> <span class="kd">class</span> <span class="err">#22;</span> <span class="err">//</span> <span class="nc">com</span><span class="o">/</span><span class="n">amd</span><span class="o">/</span><span class="n">aparapi</span><span class="o">/</span><span class="n">Kernel</span> - - <span class="kd">const</span> <span class="err">#</span><span class="mi">22</span> <span class="o">=</span> <span class="n">Asciz</span> <span class="n">com</span><span class="o">/</span><span class="n">amd</span><span class="o">/</span><span class="n">aparapi</span><span class="o">/</span><span class="n">Kernel</span><span class="o">;</span> - </code></pre> - <p>Which just contains the String (Ascii) name of the class.</p> - - <p>Looking back at entry #1 again, we note that the Method also references entry #17 which contains a NameAndType entry for determining the method name and the signature.</p> - <pre class="highlight java"><code> - <span class="kd">const</span> <span class="err">#</span><span class="mi">1</span> <span class="o">=</span> <span class="n">Method</span> <span class="err">#</span><span class="mi">6</span><span class="o">.</span><span class="err">#</span><span class="mi">17</span><span class="o">;</span> <span class="c1">// com/amd/aparapi/Kernel."<init>":()V</span> - - <span class="kd">const</span> <span class="err">#</span><span class="mi">6</span> <span class="o">=</span> <span class="kd">class</span> <span class="err">#22;</span> <span class="err">//</span> <span class="nc">com</span><span class="o">/</span><span class="n">amd</span><span class="o">/</span><span class="n">aparapi</span><span class="o">/</span><span class="n">Kernel</span> - - - <span class="kd">const</span> <span class="err">#</span><span class="mi">17</span> <span class="o">=</span> <span class="n">NameAndType</span> <span class="err">#</span><span class="mi">10</span><span class="o">:</span><span class="err">#</span><span class="mi">11</span><span class="o">;</span><span class="c1">// "<init>":()V</span> - - <span class="kd">const</span> <span class="err">#</span><span class="mi">22</span> <span class="o">=</span> <span class="n">Asciz</span> <span class="n">com</span><span class="o">/</span><span class="n">amd</span><span class="o">/</span><span class="n">aparapi</span><span class="o">/</span><span class="n">Kernel</span><span class="o">;</span> - </code></pre> - <p>Entry #17’s “NameAndType” references #10 for the method name.</p> - <pre class="highlight java"><code> - <span class="kd">const</span> <span class="err">#</span><span class="mi">1</span> <span class="o">=</span> <span class="n">Method</span> <span class="err">#</span><span class="mi">6</span><span class="o">.</span><span class="err">#</span><span class="mi">17</span><span class="o">;</span> <span class="c1">// com/amd/aparapi/Kernel."<init>":()V</span> - - <span class="kd">const</span> <span class="err">#</span><span class="mi">6</span> <span class="o">=</span> <span class="kd">class</span> <span class="err">#22;</span> <span class="err">//</span> <span class="nc">com</span><span class="o">/</span><span class="n">amd</span><span class="o">/</span><span class="n">aparapi</span><span class="o">/</span><span class="n">Kernel</span> - - <span class="kd">const</span> <span class="err">#</span><span class="mi">10</span> <span class="o">=</span> <span class="n">Asciz</span> <span class="o"><</span><span class="n">init</span><span class="o">>;</span> - - <span class="kd">const</span> <span class="err">#</span><span class="mi">17</span> <span class="o">=</span> <span class="n">NameAndType</span> <span class="err">#</span><span class="mi">10</span><span class="o">:</span><span class="err">#</span><span class="mi">11</span><span class="o">;</span><span class="c1">// "<init>":()V</span> - - <span class="kd">const</span> <span class="err">#</span><span class="mi">22</span> <span class="o">=</span> <span class="n">Asciz</span> <span class="n">com</span><span class="o">/</span><span class="n">amd</span><span class="o">/</span><span class="n">aparapi</span><span class="o">/</span><span class="n">Kernel</span><span class="o">;</span> - </code></pre> - <p>And then references #11 to get the signature.</p> - <pre class="highlight java"><code> - <span class="kd">const</span> <span class="err">#</span><span class="mi">1</span> <span class="o">=</span> <span class="n">Method</span> <span class="err">#</span><span class="mi">6</span><span class="o">.</span><span class="err">#</span><span class="mi">17</span><span class="o">;</span> <span class="c1">// com/amd/aparapi/Kernel."<init>":()V</span> - - <span class="kd">const</span> <span class="err">#</span><span class="mi">6</span> <span class="o">=</span> <span class="kd">class</span> <span class="err">#22;</span> <span class="err">//</span> <span class="nc">com</span><span class="o">/</span><span class="n">amd</span><span class="o">/</span><span class="n">aparapi</span><span class="o">/</span><span class="n">Kernel</span> - - <span class="kd">const</span> <span class="err">#</span><span class="mi">10</span> <span class="o">=</span> <span class="n">Asciz</span> <span class="o"><</span><span class="n">init</span><span class="o">>;</span> - - <span class="kd">const</span> <span class="err">#</span><span class="mi">11</span> <span class="o">=</span> <span class="n">Asciz</span> <span class="o">()</span><span class="n">V</span><span class="o">;</span> - - <span class="kd">const</span> <span class="err">#</span><span class="mi">17</span> <span class="o">=</span> <span class="n">NameAndType</span> <span class="err">#</span><span class="mi">10</span><span class="o">:</span><span class="err">#</span><span class="mi">11</span><span class="o">;</span><span class="c1">// "<init>":()V</span> - - <span class="kd">const</span> <span class="err">#</span><span class="mi">22</span> <span class="o">=</span> <span class="n">Asciz</span> <span class="n">com</span><span class="o">/</span><span class="n">amd</span><span class="o">/</span><span class="n">aparapi</span><span class="o">/</span><span class="n">Kernel</span><span class="o">;</span> - </code></pre> - <p>So from constant pool #1 we ended up using slots 1,6,10,11,17 and 22 to fully resolve the method.</p> - - <p>This looks like a lot of work, however by breaking method and field references up like this, allows the various slots to be reused by other field/method descriptions.</p> - - <p>So when we see disassembled bytecode which references a constantpool slot the actual slot # (2 in the example below) will appear after the bytecode for invokevirtual.</p> - <pre class="highlight java"><code> - <span class="mi">2</span><span class="o">:</span> <span class="n">invokevirtual</span> <span class="err">#</span><span class="mi">2</span><span class="o">;</span> <span class="n">Method</span> <span class="nl">getGlobalId:</span><span class="o">(</span><span class="n">I</span><span class="o">)</span><span class="n">I</span> - </code></pre> - <p>Bytecode is basically able to access three things</p> - - <ol> - <li>Constant pool entries</li> - <li>Variable slots</li> - <li>Stack operands</li> - </ol> - - <p>Instructions are able to pop operands from the stack, push operands to the stack, load values from variable slots (to the stack), store values (from the stack) to variable slots, store values from accessed fields (to the stack) and call methods (popping args from the stack).</p> - - <p>Some instructions can only handle specific types (int, float, double, and object instances - arrays are special forms of objects) and usually the first character of the instruction helps determine which type the instruction acts upon. So imul would be a multiply instruction that operates on integers, fmul would multiply two floats, dmul for doubles. Instructions that begin with ‘a’ operate on object instances.</p> - - <p>So lets look at the first instruction.</p> - <pre class="highlight java"><code> - <span class="mi">0</span><span class="o">:</span> <span class="n">aload_0</span> - </code></pre> - <p>This instruction loads an object (a is the first character) from variable slot 0 (we’ll come back to the variable slots in a moment) and pushes it on the stack.</p> - - <p>Variables are held in 'slots’ that are reserved at compiled time.</p> - - <p>Consider this static method.</p> - <pre class="highlight java"><code> - <span class="kd">static</span> <span class="kt">int</span> <span class="nf">squareMe</span><span class="o">(</span><span class="kt">int</span> <span class="n">value</span><span class="o">){</span> - <span class="n">value</span> <span class="o">+=</span> <span class="n">value</span><span class="o">;</span> - <span class="k">return</span><span class="o">(</span><span class="n">value</span><span class="o">);</span> - <span class="o">}</span> - </code></pre> - <p>This method requires one variable slot. At any one time there is only one variable that is live, it just happens to be an argument to the method.</p> - - <p>The following method also contains one slot.</p> - <pre class="highlight java"><code> - <span class="kd">static</span> <span class="kt">int</span> <span class="nf">squareMe</span><span class="o">(){</span> - <span class="kt">int</span> <span class="n">value</span><span class="o">=</span><span class="mi">4</span><span class="o">;</span> - <span class="n">value</span> <span class="o">+=</span> <span class="n">value</span><span class="o">;</span> - <span class="k">return</span><span class="o">(</span><span class="n">value</span><span class="o">);</span> - <span class="o">}</span> - </code></pre> - <p>Here we need two slots</p> - <pre class="highlight java"><code> - <span class="kd">static</span> <span class="kt">int</span> <span class="nf">squareMe</span><span class="o">(</span><span class="kt">int</span> <span class="n">arg</span><span class="o">){</span> - <span class="kt">int</span> <span class="n">value</span><span class="o">=</span><span class="n">arg</span><span class="o">*</span><span class="n">arg</span><span class="o">;</span> - <span class="k">return</span><span class="o">(</span><span class="n">value</span><span class="o">);</span> - <span class="o">}</span> - </code></pre> - <p>Suprisingly the following also only requires two slots.</p> - <pre class="highlight plaintext"><code>static int squareMe(int arg){ - { - int temp = arg*arg; - } - int value=arg*arg; - return(value); - } - </code></pre> - <p>Note that in the above example the temp variable loses scope before the local variable value is used. So only two slots are required. Both temp and value can share a slot.</p> - - <p>If we have an instance method we always require one extra slot (always slot 0) for the this reference.</p> - - <p>So</p> - <pre class="highlight java"><code> - <span class="kt">int</span> <span class="nf">squareMe</span><span class="o">(</span><span class="kt">int</span> <span class="n">arg</span><span class="o">){</span> - <span class="kt">int</span> <span class="n">value</span><span class="o">=</span><span class="n">arg</span><span class="o">*</span><span class="n">arg</span><span class="o">;</span> - <span class="k">return</span><span class="o">(</span><span class="n">value</span><span class="o">);</span> - <span class="o">}</span> - </code></pre> - <p>Requires three slots.</p> - - <p>Anyway back to our bytecode</p> - <pre class="highlight java"><code> - <span class="mi">0</span><span class="o">:</span> <span class="n">aload_0</span> - </code></pre> - <p>This loads the object instance in slot 0 (this) and pushes it on the stack.</p> - - <p>Next we have</p> - <pre class="highlight java"><code> - <span class="mi">1</span><span class="o">:</span> <span class="n">iconst_0</span> - </code></pre> - <p>Which pushes the int constant 0 on the stack. So the stack contains {this,0}</p> - - <p>Next we have</p> - <pre class="highlight java"><code> - <span class="mi">2</span><span class="o">:</span> <span class="n">invokevirtual</span> <span class="err">#</span><span class="mi">2</span><span class="o">;</span> <span class="c1">//Method getGlobalId:(I)I</span> - </code></pre> - <p>This is the bytecode for calling a method. Basically the instruction itself references the constant pool (we’ll come back to this ;) ) and pulls the method description in <code>constantPool2</code> which happens to be the description for a method called <code>getGlobalId()</code> which takes an integer and returns an <code>int</code>.</p> - - <p>So the VM will pop the top value <code>(int - const 0)</code> as the method arg, and then will pop an object reference (this!) and will call the method <code>this.getGlobalId(0)</code> and will push the result (an int) back on the stack.</p> - - <p>So our stack which contains <code>{this,0}</code> now contains the result of this.getGlobalId(0), lets assume it is <code>{0}</code>. We describe this invoke instruction as consuming two operands from the stack and producing one.</p> - - <p>Before we start executing our stack is empty {}, the slots are initialized with 'this’ (if an instance method) and any arguments passed to the method.</p> - <pre class="highlight java"><code> - <span class="mi">0</span> <span class="mi">1</span> - <span class="n">slots</span><span class="o">=[</span><span class="k">this</span><span class="o">,</span> <span class="o">?</span> <span class="o">]</span> <span class="n">stack</span><span class="o">={}</span> - - <span class="mi">0</span> <span class="mi">1</span> - <span class="mi">0</span><span class="o">:</span> <span class="n">aload_0</span> <span class="n">slots</span><span class="o">=[</span><span class="k">this</span><span class="o">,</span> <span class="o">?</span> <span class="o">]</span> <span class="n">stack</span><span class="o">={</span><span class="k">this</span><span class="o">}</span> - <span class="mi">0</span> <span class="mi">1</span> - <span class="mi">1</span><span class="o">:</span> <span class="n">iconst_0</span> <span class="n">slots</span><span class="o">=[</span><span class="k">this</span><span class="o">,</span> <span class="o">?</span> <span class="o">]</span> <span class="n">stack</span><span class="o">={</span><span class="k">this</span><span class="o">,</span> <span class="mi">0</span><span class="o">}</span> - <span class="mi">0</span> <span class="mi">1</span> - <span class="mi">2</span><span class="o">:</span> <span class="n">invokevirtual</span> <span class="err">#</span><span class="mi">2</span><span class="o">;</span> <span class="n">Method</span> <span class="nl">getGlobalId:</span><span class="o">(</span><span class="n">I</span><span class="o">)</span><span class="n">I</span> <span class="n">slots</span><span class="o">=[</span><span class="k">this</span><span class="o">,</span> <span class="o">?</span> <span class="o">]</span> <span class="n">stack</span><span class="o">={</span><span class="n">result</span> <span class="n">of</span> <span class="k">this</span><span class="o">.</span><span class="na">getGlobalId</span><span class="o">(</span><span class="mi">0</span><span class="o">)</span> <span class="n">lets</span> <span class="n">say</span> <span class="mi">0</span><span class="o">}</span> - - <span class="mi">5</span><span class="o">:</span> <span class="n">istore_1</span> <span class="n">slots</span><span class="o">=[</span><span class="k">this</span><span class="o">,</span> <span class="mi">0</span> <span class="o">]</span> <span class="n">stack</span><span class="o">={}</span> - - <span class="mi">6</span><span class="o">:</span> <span class="n">aload_0</span> <span class="n">slots</span><span class="o">=[</span><span class="k">this</span><span class="o">,</span> <span class="mi">0</span> <span class="o">]</span> <span class="n">stack</span><span class="o">={</span><span class="k">this</span><span class="o">}</span> - - <span class="mi">7</span><span class="o">:</span> <span class="n">getfield</span> <span class="err">#</span><span class="mi">3</span><span class="o">;</span> <span class="c1">//Field out:[I</span> - </code></pre> - </div> - </main> - <footer class='page-footer'> - <div class='container'> - <div class='row'> - <div class='col l4 s12'> - <h5 class='white-text'>Help Aparapi Grow</h5> - <p class='grey-text text-lighten-4'>We are a team of volunteers working on this project like it's our full time job. Any amount would help support and continue development on this project and is greatly appreciated.</p> - <form action='https://www.paypal.com/cgi-bin/webscr' id='paypal-donate' method='post' target='_top'> - <input name='cmd' type='hidden' value='_s-xclick'> - <input name='encrypted' type='hidden' value='-----BEGIN PKCS7-----MIIHoAYJKoZIhvcNAQcEoIIHkTCCB40CAQExggEwMIIBLAIBADCBlDCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20CAQAwDQYJKoZIhvcNAQEBBQAEgYATcKxN8t35TG2x34eY272SuZO3QbGy+BTGIM5DRV6Hmosotzw2TF42ceWmbXb3Gk4Wy5kUgo4TgHExCZHUSlHUl+A9KWLFejotgQJPhbiBsnns3klWbKftA3LEnP/kz/SW7OyBlpluoHoEGb354/aoX3JEctp3akHiZEmD7JyEgjELMAkGBSsOAwIaBQAwggEcBgkqhkiG9w0BBwEwFAYIKoZIhvcNAwcECOGCJwba6JICgIH4RtE1LE3juagKs+swI5tb9Y2LacWo+qn1H1aLKeg57bQMqqcWYvkoO1joYoglPc1h4mO0egZjHPQ6ih0K0IYlXw2SRpNylSlIMUE3GW6smjSSwRhscZfXQYUnmQsfYvkFwoKrlZGf/1u0Q7nwlZ1szIKnDMZ5f+k8xBcM0sMNutn/y9CH6A3zo01gQBIF29+1WYAoQspNAnfWQy3ydV7nbjIA9ThDp2WquWw3EVlvqlvm/3C2AFuH/L4q0ltn3qjkCdzXK0O2jW3TRrzligPkAy6CN0Tw2jGW5GENNC1L92vHFH4kBXUPlhvw39TgoN7/KRUjVoYPYgugggOHMIIDgzCCAuygAwIBAgIBADANBgkqhkiG9w0BAQUFADCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20wHhcNMDQwMjEzMTAxMzE1WhcNMzUwMjEzMTAxMzE1WjCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20wgZ8wDQYJKoZIhvcNAQEBBQADgY0AMIGJAoGBAMFHTt38RMxLXJyO2SmS+Ndl72T7oKJ4u4uw+6awntALWh03PewmIJuzbALScsTS4sZoS1fKciBGoh11gIfHzylvkdNe/hJl66/RGqrj5rFb08sAABNTzDTiqqNpJeBsYs/c2aiGozptX2RlnBktH+SUNpAajW724Nv2Wvhif6sFAgMBAAGjge4wgeswHQYDVR0OBBYEFJaffLvGbxe9WT9S1wob7BDWZJRrMIG7BgNVHSMEgbMwgbCAFJaffLvGbxe9WT9S1wob7BDWZJRroYGUpIGRMIGOMQswCQYDVQQGEwJVUzELMAkGA1UECBMCQ0ExFjAUBgNVBAcTDU1vdW50YWluIFZpZXcxFDASBgNVBAoTC1BheVBhbCBJbmMuMRMwEQYDVQQLFApsaXZlX2NlcnRzMREwDwYDVQQDFAhsaXZlX2FwaTEcMBoGCSqGSIb3DQEJARYNcmVAcGF5cGFsLmNvbYIBADAMBgNVHRMEBTADAQH/MA0GCSqGSIb3DQEBBQUAA4GBAIFfOlaagFrl71+jq6OKidbWFSE+Q4FqROvdgIONth+8kSK//Y/4ihuE4Ymvzn5ceE3S/iBSQQMjyvb+s2TWbQYDwcp129OPIbD9epdr4tJOUNiSojw7BHwYRiPh58S1xGlFgHFXwrEBb3dgNbMUa+u4qectsMAXpVHnD9wIyfmHMYIBmjCCAZYCAQEwgZQwgY4xCzAJBgNVBAYTAlVTMQswCQYDVQQIEwJDQTEWMBQGA1UEBxMNTW91bnRhaW4gVmlldzEUMBIGA1UEChMLUGF5UGFsIEluYy4xEzARBgNVBAsUCmxpdmVfY2VydHMxETAPBgNVBAMUCGxpdmVfYXBpMRwwGgYJKoZIhvcNAQkBFg1yZUBwYXlwYWwuY29tAgEAMAkGBSsOAwIaBQCgXTAYBgkqhkiG9w0BCQMxCwYJKoZIhvcNAQcBMBwGCSqGSIb3DQEJBTEPFw0xNjExMjkyMjA1NTNaMCMGCSqGSIb3DQEJBDEWBBS4i3Exr/pFcKOJy8uKmH+nGIMjqDANBgkqhkiG9w0BAQEFAASBgDAbFZ2jieloeB/0wCAcvYCFAIXmmBaMS5js/byzU5gK7exSTlRMX74IkmHemItaOcw3wyFlu4i118D9K9SbSbFiX9DGDcezGh42u/6G8TuZMwlvmiehwMioTVcm4jWG40YLiv8pJZypfoSx2w4IAFb4na5i/E1qOrwQOpiBho+s-----END PKCS7-----'> - <button alt='PayPal - The safer, easier way to pay online!' class='btn waves-effect waves-light red lighten-3' name='action' type='submit'> - Donate Now - </button> - </form> - </div> - <div class='col l4 s12'> - <h5 class='white-text'>Join the Discussion</h5> - <p class='grey-text text-lighten-4'>We have a Gitter chat room set up where you can talk directly with us. Come in and discuss new features, future goals, general problems or questions, or anything else you can think of.</p> - <a class='btn waves-effect waves-light red lighten-3' href='https://gitter.im/Syncleus/aparapi' target='_blank'>Chat</a> - </div> - <div class='col l4 s12' style='overflow: hidden;'> - <h5 class='white-text'>Connect</h5> - <iframe allowtransparency='true' frameborder='0' height='30' scrolling='0' src='http://ghbtns.com/github-btn.html?user=Syncleus&repo=aparapi&type=watch&count=true&size=large' width='170'></iframe> - <br> - <a class='twitter-follow-button' data-dnt='true' data-show-count='true' data-size='large' href='https://twitter.com/AparapiLib'>Follow @AparapiLib</a> - <br> - <div class='g-follow' data-annotation='bubble' data-height='24' data-href='https://plus.google.com/102266131584900704956' data-rel='publisher'></div> - </div> - </div> - </div> - <div class='footer-copyright'> - <div class='container'> - © 2016-2017 Syncleus, All rights reserved. - <a class='grey-text text-lighten-4 right' href='https://github.com/Syncleus/aparapi/blob/master/LICENSE'>Apache License v2</a> - </div> - </div> - </footer> - <!-- Scripts --> - <script src='https://code.jquery.com/jquery-2.1.4.min.js'></script> - <script> - if (!window.jQuery) { document.write('<script src="bin/jquery-2.1.1.min.js"><\/script>'); } - </script> - <script src='/javascripts/jquery.timeago.js'></script> - <script src='/javascripts/materialize.min.js'></script> - <script src='/javascripts/lunr.min.js'></script> - <script src='/javascripts/search.js'></script> - <script src='/javascripts/materialize.js'></script> - <script src='/javascripts/init.js'></script> - <!-- Twitter Button --> - <script> - !function(d,s,id){var js,fjs=d.getElementsByTagName(s)[0],p=/^http:/.test(d.location)?'http':'https';if(!d.getElementById(id)){js=d.createElement(s);js.id=id;js.src=p+'://platform.twitter.com/widgets.js';fjs.parentNode.insertBefore(js,fjs);}}(document, 'script', 'twitter-wjs'); - </script> - <!-- Google Plus Button --> - <script async='' defer='defer' src='https://apis.google.com/js/platform.js'></script> - </body> +<head> +<meta content='text/html; charset=UTF-8' http-equiv='Content-Type'> +<meta content='width=device-width, initial-scale=1' name='viewport'> +<meta content='IE=edge' http-equiv='X-UA-Compatible'> +<meta content='no' name='msapplication-tap-highlight'> +<meta content='Aparapi is an Open-source framework for executing native Java code on the GPU, developed by Syncleus.' name='description'> +<title> +Aparapi | Converting Java to OpenCL +</title> +<!-- Favicons --> +<link href='/images/favicon/apple-touch-icon-152x152.png' rel='apple-touch-icon-precomposed'> +<meta content='#FFFFFF' name='msapplication-TileColor'> +<meta content='/images/favicon/mstile-144x144.png' name='msapplication-TileImage'> +<link href='/images/favicon/favicon-32x32.png' rel='icon' sizes='32x32'> +<!-- Android 5 Chrome Color --> +<meta content='#EE6E73' name='theme-color'> +<!-- CSS --> +<link href='/stylesheets/highlight.css' media='screen,projection' rel='stylesheet' type='text/css'> +<link href='/stylesheets/style.css' media='screen,projection' rel='stylesheet' type='text/css'> +<link href='http://fonts.googleapis.com/css?family=Inconsolata' rel='stylesheet' type='text/css'> +<link href='http://fonts.googleapis.com/icon?family=Material+Icons' rel='stylesheet'> +</head> +<body> +<header> +<div class='container'> +<a class='button-collapse top-nav waves-effect waves-light circle hide-on-large-only' data-activates='nav-mobile' href='#'> +<i class='material-icons'>menu</i> +</a> +</div> +<ul class='side-nav fixed' id='nav-mobile'> +<li class='logo'> +<a class='brand-logo' href='/' id='logo-container'> +<object data='/images/logo.svg' id='front-page-logo' type='image/svg+xml'>Your browser does not support SVG</object> +</a> +</li> +<li class='search'> +<div class='search-wrapper card'> +<input id='search'> +<i class='material-icons'>search</i> +<div class='search-results'></div> +</div> +</li> +<li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/">Overview</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Introduction</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/about.html">About</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/getting-started.html">Getting Started</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/faq.html">FAQ</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Documentation</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/aparapi-patterns.html">Aparapi Patterns</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/choosing-specific-devices.html">Choosing Specific Devices</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/converting-java-to-opencl.html">Converting Java to OpenCL</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/emulating-multiple-entrypoints.html">Emulating Multiple Entrypoints</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/explicit-buffer-handling.html">Explicit Buffer Handling</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/hsa-enabled-lambda.html">HSA Enabled Lambda</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/kernel-guidelines.html">Kernel Guidelines</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/library-agent-duality.html">Library Agent Duality</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/new-features.html">New Features</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/opencl-bindings.html">OpenCL Bindings</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/private-memory-space.html">Private Memory Space</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/profiling-the-kernel.html">Profiling the Kernel</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/setting-up-hsa.html">Setting Up HSA</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/unit-tests.html">Unit Tests</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/using-hsa-simulator.html">Using HSA Simulator</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/constant-memory.html">Constant Memory</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/local-memory.html">Local Memory</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/multiple-dim-ranges.html">Multiple Dim Ranges</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Proposals</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/multiple-dim-nd-range.html">Multiple Dim ND Range</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/lambdas.html">Lambdas</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/address-space-with-buffers.html">Address Space with Buffers</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/extensions.html">Extensions</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/device.html">Device</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/multiple-entry-points.html">Multiple Entry Points</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/lambda-syntax.html">Lambda Syntax</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/showcase.html">Showcase</a></li></ul></li> +</ul> +</header> +<main> +<div class='section no-pad-bot' id='index-banner'> +<div class='container'> +<h1 class='header center-on-small-only'>Converting Java to OpenCL</h1> +<div class='row center'> +<h4 class='header col s12 light center'>How Aparapi converts bytecode to OpenCL</h4> +</div> + +</div> + +</div> +<div class='container'> +<h2>Introduction</h2> + +<p>This page acts as a quick summary for <a href="/documentation/ByteCode2OpenCL.pdf" rel="nofollow">the more detailed PDF</a> that was originally written by AMD.</p> + +<p>One of the unique Aparapi features is it’s ability to convert Java bytecode to OpenCL automatically.</p> + +<p>In this page we will try to describe the process used to perform this conversion. If you are unfamiliar with bytecode consider visiting this page WhatIsBytecode.</p> + +<p>The command</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="n">javac</span> <span class="n">Source</span><span class="o">.</span><span class="na">java</span> +</code></pre></div> +<p>Will compile the java source file Source.java to Source.class</p> + +<p>The classfile format is well documented here and we will not go into too much detail here, however it should be known that Aparapi must parse the classfile of each Kernel to extract the bytecode for the <code>Kernel.run()</code> and any method reachable from <code>Kernel.run()</code>.</p> + +<p>Lets start with a simple Kernel.</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="kn">import</span> <span class="nn">com.aparapi.Kernel</span><span class="o">;</span> + +<span class="kd">public</span> <span class="kd">class</span> <span class="nc">Squarer</span> <span class="kd">extends</span> <span class="n">Kernel</span><span class="o">{</span> + <span class="kt">int</span><span class="o">[]</span> <span class="n">in</span><span class="o">;</span> + <span class="kt">int</span><span class="o">[]</span> <span class="n">out</span><span class="o">;</span> + <span class="nd">@Override</span> <span class="kd">public</span> <span class="kt">void</span> <span class="nf">run</span><span class="o">(){</span> + <span class="kt">int</span> <span class="n">gid</span> <span class="o">=</span> <span class="n">getGlobalId</span><span class="o">(</span><span class="mi">0</span><span class="o">);</span> + <span class="n">out</span><span class="o">[</span><span class="n">gid</span><span class="o">]</span> <span class="o">=</span> <span class="n">in</span><span class="o">[</span><span class="n">gid</span><span class="o">]</span> <span class="o">*</span> <span class="n">in</span><span class="o">[</span><span class="n">gid</span><span class="o">];</span> + <span class="o">}</span> +<span class="o">}</span> +</code></pre></div> +<p>We will compile this</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="n">javac</span> <span class="o">-</span><span class="n">g</span> <span class="o">-</span><span class="n">cp</span> <span class="n">path</span><span class="o">/</span><span class="n">to</span><span class="o">/</span><span class="n">aparapi</span><span class="o">/</span><span class="n">aparapi</span><span class="o">.</span><span class="na">jar</span> <span class="n">Squarer</span><span class="o">.</span><span class="na">java</span> +</code></pre></div> +<p>and then we can look at the bytecode using javap</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="n">javap</span> <span class="o">-</span><span class="n">c</span> <span class="o">-</span><span class="n">classpath</span> <span class="n">path</span><span class="o">/</span><span class="n">to</span><span class="o">/</span><span class="n">aparapi</span><span class="o">/</span><span class="n">aparapi</span><span class="o">.</span><span class="na">jar</span><span class="o">;.</span> <span class="n">Squarer</span> +</code></pre></div> +<p>Compiled from “Squarer.java”</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="kd">public</span> <span class="kd">class</span> <span class="nc">Squarer</span> <span class="kd">extends</span> <span class="n">com</span><span class="o">.</span><span class="na">aparapi</span><span class="o">.</span><span class="na">Kernel</span> + <span class="nl">SourceFile:</span> <span class="s">"Squarer.java"</span> + <span class="n">minor</span> <span class="nl">version:</span> <span class="mi">0</span> + <span class="n">major</span> <span class="nl">version:</span> <span class="mi">50</span> + <span class="n">Constant</span> <span class="nl">pool:</span> +<span class="kd">const</span> <span class="err">#</span><span class="mi">1</span> <span class="o">=</span> <span class="n">Method</span> <span class="err">#</span><span class="mi">6</span><span class="o">.</span><span class="err">#</span><span class="mi">17</span><span class="o">;</span> <span class="c1">// com/amd/aparapi/Kernel."<init>":()V</span> +<span class="kd">const</span> <span class="err">#</span><span class="mi">2</span> <span class="o">=</span> <span class="n">Method</span> <span class="err">#</span><span class="mi">5</span><span class="o">.</span><span class="err">#</span><span class="mi">18</span><span class="o">;</span> <span class="c1">// Squarer.getGlobalId:(I)I</span> +<span class="kd">const</span> <span class="err">#</span><span class="mi">3</span> <span class="o">=</span> <span class="n">Field</span> <span class="err">#</span><span class="mi">5</span><span class="o">.</span><span class="err">#</span><span class="mi">19</span><span class="o">;</span> <span class="c1">// Squarer.out:[I</span> +<span class="kd">const</span> <span class="err">#</span><span class="mi">4</span> <span class="o">=</span> <span class="n">Field</span> <span class="err">#</span><span class="mi">5</span><span class="o">.</span><span class="err">#</span><span class="mi">20</span><span class="o">;</span> <span class="c1">// Squarer.in:[I</span> +<span class="kd">const</span> <span class="err">#</span><span class="mi">5</span> <span class="o">=</span> <span class="kd">class</span> <span class="err">#21;</span> <span class="err">//</span> <span class="nc">Squarer</span> +<span class="kd">const</span> <span class="err">#</span><span class="mi">6</span> <span class="o">=</span> <span class="kd">class</span> <span class="err">#22;</span> <span class="err">//</span> <span class="nc">com</span><span class="o">/</span><span class="n">amd</span><span class="o">/</span><span class="n">aparapi</span><span class="o">/</span><span class="n">Kernel</span> +<span class="kd">const</span> <span class="err">#</span><span class="mi">7</span> <span class="o">=</span> <span class="n">Asciz</span> <span class="n">in</span><span class="o">;</span> +<span class="kd">const</span> <span class="err">#</span><span class="mi">8</span> <span class="o">=</span> <span class="n">Asciz</span> <span class="o">[</span><span class="n">I</span><span class="o">;</span> +<span class="kd">const</span> <span class="err">#</span><span class="mi">9</span> <span class="o">=</span> <span class="n">Asciz</span> <span class="n">out</span><span class="o">;</span> +<span class="kd">const</span> <span class="err">#</span><span class="mi">10</span> <span class="o">=</span> <span class="n">Asciz</span> <span class="o"><</span><span class="n">init</span><span class="o">>;</span> +<span class="kd">const</span> <span class="err">#</span><span class="mi">11</span> <span class="o">=</span> <span class="n">Asciz</span> <span class="o">()</span><span class="n">V</span><span class="o">;</span> +<span class="kd">const</span> <span class="err">#</span><span class="mi">12</span> <span class="o">=</span> <span class="n">Asciz</span> <span class="n">Code</span><span class="o">;</span> +<span class="kd">const</span> <span class="err">#</span><span class="mi">13</span> <span class="o">=</span> <span class="n">Asciz</span> <span class="n">LineNumberTable</span><span class="o">;</span> +<span class="kd">const</span> <span class="err">#</span><span class="mi">14</span> <span class="o">=</span> <span class="n">Asciz</span> <span class="n">run</span><span class="o">;</span> +<span class="kd">const</span> <span class="err">#</span><span class="mi">15</span> <span class="o">=</span> <span class="n">Asciz</span> <span class="n">SourceFile</span><span class="o">;</span> +<span class="kd">const</span> <span class="err">#</span><span class="mi">16</span> <span class="o">=</span> <span class="n">Asciz</span> <span class="n">Squarer</span><span class="o">.</span><span class="na">java</span><span class="o">;</span> +<span class="kd">const</span> <span class="err">#</span><span class="mi">17</span> <span class="o">=</span> <span class="n">NameAndType</span> <span class="err">#</span><span class="mi">10</span><span class="o">:</span><span class="err">#</span><span class="mi">11</span><span class="o">;</span><span class="c1">// "<init>":()V</span> +<span class="kd">const</span> <span class="err">#</span><span class="mi">18</span> <span class="o">=</span> <span class="n">NameAndType</span> <span class="err">#</span><span class="mi">23</span><span class="o">:</span><span class="err">#</span><span class="mi">24</span><span class="o">;</span><span class="c1">// getGlobalId:(I)I</span> +<span class="kd">const</span> <span class="err">#</span><span class="mi">19</span> <span class="o">=</span> <span class="n">NameAndType</span> <span class="err">#</span><span class="mi">9</span><span class="o">:</span><span class="err">#</span><span class="mi">8</span><span class="o">;</span><span class="c1">// out:[I</span> +<span class="kd">const</span> <span class="err">#</span><span class="mi">20</span> <span class="o">=</span> <span class="n">NameAndType</span> <span class="err">#</span><span class="mi">7</span><span class="o">:</span><span class="err">#</span><span class="mi">8</span><span class="o">;</span><span class="c1">// in:[I</span> +<span class="kd">const</span> <span class="err">#</span><span class="mi">21</span> <span class="o">=</span> <span class="n">Asciz</span> <span class="n">Squarer</span><span class="o">;</span> +<span class="kd">const</span> <span class="err">#</span><span class="mi">22</span> <span class="o">=</span> <span class="n">Asciz</span> <span class="n">com</span><span class="o">/</span><span class="n">amd</span><span class="o">/</span><span class="n">aparapi</span><span class="o">/</span><span class="n">Kernel</span><span class="o">;</span> +<span class="kd">const</span> <span class="err">#</span><span class="mi">23</span> <span class="o">=</span> <span class="n">Asciz</span> <span class="n">getGlobalId</span><span class="o">;</span> +<span class="kd">const</span> <span class="err">#</span><span class="mi">24</span> <span class="o">=</span> <span class="n">Asciz</span> <span class="o">(</span><span class="n">I</span><span class="o">)</span><span class="n">I</span><span class="o">;</span> + +<span class="o">{</span> +<span class="kt">int</span><span class="o">[]</span> <span class="n">in</span><span class="o">;</span> + +<span class="kt">int</span><span class="o">[]</span> <span class="n">out</span><span class="o">;</span> + +<span class="kd">public</span> <span class="nf">Squarer</span><span class="o">();</span> + <span class="nl">Code:</span> + <span class="n">Stack</span><span class="o">=</span><span class="mi">1</span><span class="o">,</span> <span class="n">Locals</span><span class="o">=</span><span class="mi">1</span><span class="o">,</span> <span class="n">Args_size</span><span class="o">=</span><span class="mi">1</span> + <span class="mi">0</span><span class="o">:</span> <span class="n">aload_0</span> + <span class="mi">1</span><span class="o">:</span> <span class="n">invokespecial</span> <span class="err">#</span><span class="mi">1</span><span class="o">;</span> <span class="c1">//Method com/amd/aparapi/Kernel."<init>":()V</span> + <span class="mi">4</span><span class="o">:</span> <span class="k">return</span> + + +<span class="kd">public</span> <span class="kt">void</span> <span class="nf">run</span><span class="o">();</span> + <span class="nl">Code:</span> + <span class="n">Stack</span><span class="o">=</span><span class="mi">5</span><span class="o">,</span> <span class="n">Locals</span><span class="o">=</span><span class="mi">2</span><span class="o">,</span> <span class="n">Args_size</span><span class="o">=</span><span class="mi">1</span> + <span class="mi">0</span><span class="o">:</span> <span class="n">aload_0</span> + <span class="mi">1</span><span class="o">:</span> <span class="n">iconst_0</span> + <span class="mi">2</span><span class="o">:</span> <span class="n">invokevirtual</span> <span class="err">#</span><span class="mi">2</span><span class="o">;</span> <span class="c1">//Method getGlobalId:(I)I</span> + <span class="mi">5</span><span class="o">:</span> <span class="n">istore_1</span> + <span class="mi">6</span><span class="o">:</span> <span class="n">aload_0</span> + <span class="mi">7</span><span class="o">:</span> <span class="n">getfield</span> <span class="err">#</span><span class="mi">3</span><span class="o">;</span> <span class="c1">//Field out:[I</span> + <span class="mi">10</span><span class="o">:</span> <span class="n">iload_1</span> + <span class="mi">11</span><span class="o">:</span> <span class="n">aload_0</span> + <span class="mi">12</span><span class="o">:</span> <span class="n">getfield</span> <span class="err">#</span><span class="mi">4</span><span class="o">;</span> <span class="c1">//Field in:[I</span> + <span class="mi">15</span><span class="o">:</span> <span class="n">iload_1</span> + <span class="mi">16</span><span class="o">:</span> <span class="n">iaload</span> + <span class="mi">17</span><span class="o">:</span> <span class="n">aload_0</span> + <span class="mi">18</span><span class="o">:</span> <span class="n">getfield</span> <span class="err">#</span><span class="mi">4</span><span class="o">;</span> <span class="c1">//Field in:[I</span> + <span class="mi">21</span><span class="o">:</span> <span class="n">iload_1</span> + <span class="mi">22</span><span class="o">:</span> <span class="n">iaload</span> + <span class="mi">23</span><span class="o">:</span> <span class="n">imul</span> + <span class="mi">24</span><span class="o">:</span> <span class="n">iastore</span> + <span class="mi">25</span><span class="o">:</span> <span class="k">return</span> +<span class="o">}</span> +</code></pre></div> +<p>Here we see constant pool of the class and the disassembled bytecode of the default constructor <code>Squarer()</code> and the <code>Squarer.run()</code> method.</p> + +<p>The constant pool is a table of constant values that can be accessed from the bytecode of any methods from within this class. Some of the constants are String literals defined within the source (or literals used to name classes, fields, methods, variables or signatures), other slots represent Classes, Methods, Fields or Type signatures. These later constant pool entries cross-reference other constant pool entries to describe higher level artifact.</p> + +<p>For example constant pool entry #1 is</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="kd">const</span> <span class="err">#</span><span class="mi">1</span> <span class="o">=</span> <span class="n">Method</span> <span class="err">#</span><span class="mi">6</span><span class="o">.</span><span class="err">#</span><span class="mi">17</span><span class="o">;</span> <span class="c1">// com/amd/aparapi/Kernel."<init>":()V</span> +</code></pre></div> +<p>So entry #1 defines a method. The class containing the method is defined in constant pool entry #6. So lets look at constant pool entry #6.</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="kd">const</span> <span class="err">#</span><span class="mi">1</span> <span class="o">=</span> <span class="n">Method</span> <span class="err">#</span><span class="mi">6</span><span class="o">.</span><span class="err">#</span><span class="mi">17</span><span class="o">;</span> <span class="c1">// com/amd/aparapi/Kernel."<init>":()V</span> + +<span class="kd">const</span> <span class="err">#</span><span class="mi">6</span> <span class="o">=</span> <span class="kd">class</span> <span class="err">#22;</span> <span class="err">//</span> <span class="nc">com</span><span class="o">/</span><span class="n">amd</span><span class="o">/</span><span class="n">aparapi</span><span class="o">/</span><span class="n">Kernel</span> +</code></pre></div> +<p>At constant pool entry #6 we find a class definition which refers to entry #22</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="kd">const</span> <span class="err">#</span><span class="mi">1</span> <span class="o">=</span> <span class="n">Method</span> <span class="err">#</span><span class="mi">6</span><span class="o">.</span><span class="err">#</span><span class="mi">17</span><span class="o">;</span> <span class="c1">// com/amd/aparapi/Kernel."<init>":()V</span> + +<span class="kd">const</span> <span class="err">#</span><span class="mi">6</span> <span class="o">=</span> <span class="kd">class</span> <span class="err">#22;</span> <span class="err">//</span> <span class="nc">com</span><span class="o">/</span><span class="n">amd</span><span class="o">/</span><span class="n">aparapi</span><span class="o">/</span><span class="n">Kernel</span> + +<span class="kd">const</span> <span class="err">#</span><span class="mi">22</span> <span class="o">=</span> <span class="n">Asciz</span> <span class="n">com</span><span class="o">/</span><span class="n">amd</span><span class="o">/</span><span class="n">aparapi</span><span class="o">/</span><span class="n">Kernel</span><span class="o">;</span> +</code></pre></div> +<p>Which just contains the String (Ascii) name of the class.</p> + +<p>Looking back at entry #1 again, we note that the Method also references entry #17 which contains a NameAndType entry for determining the method name and the signature.</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="kd">const</span> <span class="err">#</span><span class="mi">1</span> <span class="o">=</span> <span class="n">Method</span> <span class="err">#</span><span class="mi">6</span><span class="o">.</span><span class="err">#</span><span class="mi">17</span><span class="o">;</span> <span class="c1">// com/amd/aparapi/Kernel."<init>":()V</span> + +<span class="kd">const</span> <span class="err">#</span><span class="mi">6</span> <span class="o">=</span> <span class="kd">class</span> <span class="err">#22;</span> <span class="err">//</span> <span class="nc">com</span><span class="o">/</span><span class="n">amd</span><span class="o">/</span><span class="n">aparapi</span><span class="o">/</span><span class="n">Kernel</span> + + +<span class="kd">const</span> <span class="err">#</span><span class="mi">17</span> <span class="o">=</span> <span class="n">NameAndType</span> <span class="err">#</span><span class="mi">10</span><span class="o">:</span><span class="err">#</span><span class="mi">11</span><span class="o">;</span><span class="c1">// "<init>":()V</span> + +<span class="kd">const</span> <span class="err">#</span><span class="mi">22</span> <span class="o">=</span> <span class="n">Asciz</span> <span class="n">com</span><span class="o">/</span><span class="n">amd</span><span class="o">/</span><span class="n">aparapi</span><span class="o">/</span><span class="n">Kernel</span><span class="o">;</span> +</code></pre></div> +<p>Entry #17’s “NameAndType” references #10 for the method name.</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="kd">const</span> <span class="err">#</span><span class="mi">1</span> <span class="o">=</span> <span class="n">Method</span> <span class="err">#</span><span class="mi">6</span><span class="o">.</span><span class="err">#</span><span class="mi">17</span><span class="o">;</span> <span class="c1">// com/amd/aparapi/Kernel."<init>":()V</span> + +<span class="kd">const</span> <span class="err">#</span><span class="mi">6</span> <span class="o">=</span> <span class="kd">class</span> <span class="err">#22;</span> <span class="err">//</span> <span class="nc">com</span><span class="o">/</span><span class="n">amd</span><span class="o">/</span><span class="n">aparapi</span><span class="o">/</span><span class="n">Kernel</span> + +<span class="kd">const</span> <span class="err">#</span><span class="mi">10</span> <span class="o">=</span> <span class="n">Asciz</span> <span class="o"><</span><span class="n">init</span><span class="o">>;</span> + +<span class="kd">const</span> <span class="err">#</span><span class="mi">17</span> <span class="o">=</span> <span class="n">NameAndType</span> <span class="err">#</span><span class="mi">10</span><span class="o">:</span><span class="err">#</span><span class="mi">11</span><span class="o">;</span><span class="c1">// "<init>":()V</span> + +<span class="kd">const</span> <span class="err">#</span><span class="mi">22</span> <span class="o">=</span> <span class="n">Asciz</span> <span class="n">com</span><span class="o">/</span><span class="n">amd</span><span class="o">/</span><span class="n">aparapi</span><span class="o">/</span><span class="n">Kernel</span><span class="o">;</span> +</code></pre></div> +<p>And then references #11 to get the signature.</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="kd">const</span> <span class="err">#</span><span class="mi">1</span> <span class="o">=</span> <span class="n">Method</span> <span class="err">#</span><span class="mi">6</span><span class="o">.</span><span class="err">#</span><span class="mi">17</span><span class="o">;</span> <span class="c1">// com/amd/aparapi/Kernel."<init>":()V</span> + +<span class="kd">const</span> <span class="err">#</span><span class="mi">6</span> <span class="o">=</span> <span class="kd">class</span> <span class="err">#22;</span> <span class="err">//</span> <span class="nc">com</span><span class="o">/</span><span class="n">amd</span><span class="o">/</span><span class="n">aparapi</span><span class="o">/</span><span class="n">Kernel</span> + +<span class="kd">const</span> <span class="err">#</span><span class="mi">10</span> <span class="o">=</span> <span class="n">Asciz</span> <span class="o"><</span><span class="n">init</span><span class="o">>;</span> + +<span class="kd">const</span> <span class="err">#</span><span class="mi">11</span> <span class="o">=</span> <span class="n">Asciz</span> <span class="o">()</span><span class="n">V</span><span class="o">;</span> + +<span class="kd">const</span> <span class="err">#</span><span class="mi">17</span> <span class="o">=</span> <span class="n">NameAndType</span> <span class="err">#</span><span class="mi">10</span><span class="o">:</span><span class="err">#</span><span class="mi">11</span><span class="o">;</span><span class="c1">// "<init>":()V</span> + +<span class="kd">const</span> <span class="err">#</span><span class="mi">22</span> <span class="o">=</span> <span class="n">Asciz</span> <span class="n">com</span><span class="o">/</span><span class="n">amd</span><span class="o">/</span><span class="n">aparapi</span><span class="o">/</span><span class="n">Kernel</span><span class="o">;</span> +</code></pre></div> +<p>So from constant pool #1 we ended up using slots 1,6,10,11,17 and 22 to fully resolve the method.</p> + +<p>This looks like a lot of work, however by breaking method and field references up like this, allows the various slots to be reused by other field/method descriptions.</p> + +<p>So when we see disassembled bytecode which references a constantpool slot the actual slot # (2 in the example below) will appear after the bytecode for invokevirtual.</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="mi">2</span><span class="o">:</span> <span class="n">invokevirtual</span> <span class="err">#</span><span class="mi">2</span><span class="o">;</span> <span class="n">Method</span> <span class="nl">getGlobalId:</span><span class="o">(</span><span class="n">I</span><span class="o">)</span><span class="n">I</span> +</code></pre></div> +<p>Bytecode is basically able to access three things</p> + +<ol> +<li>Constant pool entries</li> +<li>Variable slots</li> +<li>Stack operands</li> +</ol> + +<p>Instructions are able to pop operands from the stack, push operands to the stack, load values from variable slots (to the stack), store values (from the stack) to variable slots, store values from accessed fields (to the stack) and call methods (popping args from the stack).</p> + +<p>Some instructions can only handle specific types (int, float, double, and object instances - arrays are special forms of objects) and usually the first character of the instruction helps determine which type the instruction acts upon. So imul would be a multiply instruction that operates on integers, fmul would multiply two floats, dmul for doubles. Instructions that begin with ‘a’ operate on object instances.</p> + +<p>So lets look at the first instruction.</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="mi">0</span><span class="o">:</span> <span class="n">aload_0</span> +</code></pre></div> +<p>This instruction loads an object (a is the first character) from variable slot 0 (we’ll come back to the variable slots in a moment) and pushes it on the stack.</p> + +<p>Variables are held in ‘slots’ that are reserved at compiled time.</p> + +<p>Consider this static method.</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="kd">static</span> <span class="kt">int</span> <span class="nf">squareMe</span><span class="o">(</span><span class="kt">int</span> <span class="n">value</span><span class="o">){</span> + <span class="n">value</span> <span class="o">+=</span> <span class="n">value</span><span class="o">;</span> + <span class="k">return</span><span class="o">(</span><span class="n">value</span><span class="o">);</span> +<span class="o">}</span> +</code></pre></div> +<p>This method requires one variable slot. At any one time there is only one variable that is live, it just happens to be an argument to the method.</p> + +<p>The following method also contains one slot.</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="kd">static</span> <span class="kt">int</span> <span class="nf">squareMe</span><span class="o">(){</span> + <span class="kt">int</span> <span class="n">value</span><span class="o">=</span><span class="mi">4</span><span class="o">;</span> + <span class="n">value</span> <span class="o">+=</span> <span class="n">value</span><span class="o">;</span> + <span class="k">return</span><span class="o">(</span><span class="n">value</span><span class="o">);</span> +<span class="o">}</span> +</code></pre></div> +<p>Here we need two slots</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="kd">static</span> <span class="kt">int</span> <span class="nf">squareMe</span><span class="o">(</span><span class="kt">int</span> <span class="n">arg</span><span class="o">){</span> + <span class="kt">int</span> <span class="n">value</span><span class="o">=</span><span class="n">arg</span><span class="o">*</span><span class="n">arg</span><span class="o">;</span> + <span class="k">return</span><span class="o">(</span><span class="n">value</span><span class="o">);</span> +<span class="o">}</span> +</code></pre></div> +<p>Suprisingly the following also only requires two slots.</p> +<div class="highlight"><pre class="highlight plaintext"><code>static int squareMe(int arg){ + { + int temp = arg*arg; + } + int value=arg*arg; + return(value); +} +</code></pre></div> +<p>Note that in the above example the temp variable loses scope before the local variable value is used. So only two slots are required. Both temp and value can share a slot.</p> + +<p>If we have an instance method we always require one extra slot (always slot 0) for the this reference.</p> + +<p>So</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="kt">int</span> <span class="nf">squareMe</span><span class="o">(</span><span class="kt">int</span> <span class="n">arg</span><span class="o">){</span> + <span class="kt">int</span> <span class="n">value</span><span class="o">=</span><span class="n">arg</span><span class="o">*</span><span class="n">arg</span><span class="o">;</span> + <span class="k">return</span><span class="o">(</span><span class="n">value</span><span class="o">);</span> +<span class="o">}</span> +</code></pre></div> +<p>Requires three slots.</p> + +<p>Anyway back to our bytecode</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="mi">0</span><span class="o">:</span> <span class="n">aload_0</span> +</code></pre></div> +<p>This loads the object instance in slot 0 (this) and pushes it on the stack.</p> + +<p>Next we have</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="mi">1</span><span class="o">:</span> <span class="n">iconst_0</span> +</code></pre></div> +<p>Which pushes the int constant 0 on the stack. So the stack contains {this,0}</p> + +<p>Next we have</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="mi">2</span><span class="o">:</span> <span class="n">invokevirtual</span> <span class="err">#</span><span class="mi">2</span><span class="o">;</span> <span class="c1">//Method getGlobalId:(I)I</span> +</code></pre></div> +<p>This is the bytecode for calling a method. Basically the instruction itself references the constant pool (we’ll come back to this ;) ) and pulls the method description in <code>constantPool2</code> which happens to be the description for a method called <code>getGlobalId()</code> which takes an integer and returns an <code>int</code>.</p> + +<p>So the VM will pop the top value <code>(int - const 0)</code> as the method arg, and then will pop an object reference (this!) and will call the method <code>this.getGlobalId(0)</code> and will push the result (an int) back on the stack.</p> + +<p>So our stack which contains <code>{this,0}</code> now contains the result of this.getGlobalId(0), lets assume it is <code>{0}</code>. We describe this invoke instruction as consuming two operands from the stack and producing one.</p> + +<p>Before we start executing our stack is empty {}, the slots are initialized with ‘this’ (if an instance method) and any arguments passed to the method.</p> +<div class="highlight"><pre class="highlight java"><code> + <span class="mi">0</span> <span class="mi">1</span> + <span class="n">slots</span><span class="o">=[</span><span class="k">this</span><span class="o">,</span> <span class="o">?</span> <span class="o">]</span> <span class="n">stack</span><span class="o">={}</span> + + <span class="mi">0</span> <span class="mi">1</span> +<span class="mi">0</span><span class="o">:</span> <span class="n">aload_0</span> <span class="n">slots</span><span class="o">=[</span><span class="k">this</span><span class="o">,</span> <span class="o">?</span> <span class="o">]</span> <span class="n">stack</span><span class="o">={</span><span class="k">this</span><span class="o">}</span> + <span class="mi">0</span> <span class="mi">1</span> +<span class="mi">1</span><span class="o">:</span> <span class="n">iconst_0</span> <span class="n">slots</span><span class="o">=[</span><span class="k">this</span><span class="o">,</span> <span class="o">?</span> <span class="o">]</span> <span class="n">stack</span><span class="o">={</span><span class="k">this</span><span class="o">,</span> <span class="mi">0</span><span class="o">}</span> + <span class="mi">0</span> <span class="mi">1</span> +<span class="mi">2</span><span class="o">:</span> <span class="n">invokevirtual</span> <span class="err">#</span><span class="mi">2</span><span class="o">;</span> <span class="n">Method</span> <span class="nl">getGlobalId:</span><span class="o">(</span><span class="n">I</span><span class="o">)</span><span class="n">I</span> <span class="n">slots</span><span class="o">=[</span><span class="k">this</span><span class="o">,</span> <span class="o">?</span> <span class="o">]</span> <span class="n">stack</span><span class="o">={</span><span class="n">result</span> <span class="n">of</span> <span class="k">this</span><span class="o">.</span><span class="na">getGlobalId</span><span class="o">(</span><span class="mi">0</span><span class="o">)</span> <span class="n">lets</span> <span class="n">say</span> <span class="mi">0</span><span class="o">}</span> + +<span class="mi">5</span><span class="o">:</span> <span class="n">istore_1</span> <span class="n">slots</span><span class="o">=[</span><span class="k">this</span><span class="o">,</span> <span class="mi">0</span> <span class="o">]</span> <span class="n">stack</span><span class="o">={}</span> + +<span class="mi">6</span><span class="o">:</span> <span class="n">aload_0</span> <span class="n">slots</span><span class="o">=[</span><span class="k">this</span><span class="o">,</span> <span class="mi">0</span> <span class="o">]</span> <span class="n">stack</span><span class="o">={</span><span class="k">this</span><span class="o">}</span> + +<span class="mi">7</span><span class="o">:</span> <span class="n">getfield</span> <span class="err">#</span><span class="mi">3</span><span class="o">;</span> <span class="c1">//Field out:[I</span> +</code></pre></div> +</div> +</main> +<footer class='page-footer'> +<div class='container'> +<div class='row'> +<div class='col l4 s12'> +<h5 class='white-text'>Help Aparapi Grow</h5> +<p class='grey-text text-lighten-4'>We are a team of volunteers working on this project like it's our full time job. Any amount would help support and continue development on this project and is greatly appreciated.</p> +<form action='https://www.paypal.com/cgi-bin/webscr' id='paypal-donate' method='post' target='_top'> +<input name='cmd' type='hidden' value='_s-xclick'> +<input name='encrypted' type='hidden' value='-----BEGIN PKCS7-----MIIHoAYJKoZIhvcNAQcEoIIHkTCCB40CAQExggEwMIIBLAIBADCBlDCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20CAQAwDQYJKoZIhvcNAQEBBQAEgYATcKxN8t35TG2x34eY272SuZO3QbGy+BTGIM5DRV6Hmosotzw2TF42ceWmbXb3Gk4Wy5kUgo4TgHExCZHUSlHUl+A9KWLFejotgQJPhbiBsnns3klWbKftA3LEnP/kz/SW7OyBlpluoHoEGb354/aoX3JEctp3akHiZEmD7JyEgjELMAkGBSsOAwIaBQAwggEcBgkqhkiG9w0BBwEwFAYIKoZIhvcNAwcECOGCJwba6JICgIH4RtE1LE3juagKs+swI5tb9Y2LacWo+qn1H1aLKeg57bQMqqcWYvkoO1joYoglPc1h4mO0egZjHPQ6ih0K0IYlXw2SRpNylSlIMUE3GW6smjSSwRhscZfXQYUnmQsfYvkFwoKrlZGf/1u0Q7nwlZ1szIKnDMZ5f+k8xBcM0sMNutn/y9CH6A3zo01gQBIF29+1WYAoQspNAnfWQy3ydV7nbjIA9ThDp2WquWw3EVlvqlvm/3C2AFuH/L4q0ltn3qjkCdzXK0O2jW3TRrzligPkAy6CN0Tw2jGW5GENNC1L92vHFH4kBXUPlhvw39TgoN7/KRUjVoYPYgugggOHMIIDgzCCAuygAwIBAgIBADANBgkqhkiG9w0BAQUFADCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20wHhcNMDQwMjEzMTAxMzE1WhcNMzUwMjEzMTAxMzE1WjCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20wgZ8wDQYJKoZIhvcNAQEBBQADgY0AMIGJAoGBAMFHTt38RMxLXJyO2SmS+Ndl72T7oKJ4u4uw+6awntALWh03PewmIJuzbALScsTS4sZoS1fKciBGoh11gIfHzylvkdNe/hJl66/RGqrj5rFb08sAABNTzDTiqqNpJeBsYs/c2aiGozptX2RlnBktH+SUNpAajW724Nv2Wvhif6sFAgMBAAGjge4wgeswHQYDVR0OBBYEFJaffLvGbxe9WT9S1wob7BDWZJRrMIG7BgNVHSMEgbMwgbCAFJaffLvGbxe9WT9S1wob7BDWZJRroYGUpIGRMIGOMQswCQYDVQQGEwJVUzELMAkGA1UECBMCQ0ExFjAUBgNVBAcTDU1vdW50YWluIFZpZXcxFDASBgNVBAoTC1BheVBhbCBJbmMuMRMwEQYDVQQLFApsaXZlX2NlcnRzMREwDwYDVQQDFAhsaXZlX2FwaTEcMBoGCSqGSIb3DQEJARYNcmVAcGF5cGFsLmNvbYIBADAMBgNVHRMEBTADAQH/MA0GCSqGSIb3DQEBBQUAA4GBAIFfOlaagFrl71+jq6OKidbWFSE+Q4FqROvdgIONth+8kSK//Y/4ihuE4Ymvzn5ceE3S/iBSQQMjyvb+s2TWbQYDwcp129OPIbD9epdr4tJOUNiSojw7BHwYRiPh58S1xGlFgHFXwrEBb3dgNbMUa+u4qectsMAXpVHnD9wIyfmHMYIBmjCCAZYCAQEwgZQwgY4xCzAJBgNVBAYTAlVTMQswCQYDVQQIEwJDQTEWMBQGA1UEBxMNTW91bnRhaW4gVmlldzEUMBIGA1UEChMLUGF5UGFsIEluYy4xEzARBgNVBAsUCmxpdmVfY2VydHMxETAPBgNVBAMUCGxpdmVfYXBpMRwwGgYJKoZIhvcNAQkBFg1yZUBwYXlwYWwuY29tAgEAMAkGBSsOAwIaBQCgXTAYBgkqhkiG9w0BCQMxCwYJKoZIhvcNAQcBMBwGCSqGSIb3DQEJBTEPFw0xNjExMjkyMjA1NTNaMCMGCSqGSIb3DQEJBDEWBBS4i3Exr/pFcKOJy8uKmH+nGIMjqDANBgkqhkiG9w0BAQEFAASBgDAbFZ2jieloeB/0wCAcvYCFAIXmmBaMS5js/byzU5gK7exSTlRMX74IkmHemItaOcw3wyFlu4i118D9K9SbSbFiX9DGDcezGh42u/6G8TuZMwlvmiehwMioTVcm4jWG40YLiv8pJZypfoSx2w4IAFb4na5i/E1qOrwQOpiBho+s-----END PKCS7----- +'> +<button alt='PayPal - The safer, easier way to pay online!' class='btn waves-effect waves-light red lighten-3' name='action' type='submit'> +Donate Now +</button> +</form> +</div> +<div class='col l4 s12'> +<h5 class='white-text'>Join the Discussion</h5> +<p class='grey-text text-lighten-4'>We have a Gitter chat room set up where you can talk directly with us. Come in and discuss new features, future goals, general problems or questions, or anything else you can think of.</p> +<a class='btn waves-effect waves-light red lighten-3' href='https://gitter.im/Syncleus/aparapi' target='_blank'>Chat</a> +</div> +<div class='col l4 s12' style='overflow: hidden;'> +<h5 class='white-text'>Connect</h5> +<iframe allowtransparency='true' frameborder='0' height='30' scrolling='0' src='http://ghbtns.com/github-btn.html?user=Syncleus&repo=aparapi&type=watch&count=true&size=large' width='170'></iframe> +<br> +<a class='twitter-follow-button' data-dnt='true' data-show-count='true' data-size='large' href='https://twitter.com/AparapiLib'>Follow @AparapiLib</a> +<br> +<div class='g-follow' data-annotation='bubble' data-height='24' data-href='https://plus.google.com/102266131584900704956' data-rel='publisher'></div> +</div> +</div> +</div> +<div class='footer-copyright'> +<div class='container'> +© 2016-2017 Syncleus, All rights reserved. +<a class='grey-text text-lighten-4 right' href='https://github.com/Syncleus/aparapi/blob/master/LICENSE'>Apache License v2</a> +</div> +</div> +</footer> +<!-- Scripts --> +<script src='https://code.jquery.com/jquery-2.1.4.min.js'></script> +<script> + if (!window.jQuery) { document.write('<script src="bin/jquery-2.1.1.min.js"><\/script>'); } +</script> +<script src='/javascripts/jquery.timeago.js'></script> +<script src='/javascripts/materialize.min.js'></script> +<script src='/javascripts/lunr.min.js'></script> +<script src='/javascripts/search.js'></script> +<script src='/javascripts/materialize.js'></script> +<script src='/javascripts/init.js'></script> +<!-- Twitter Button --> +<script> + !function(d,s,id){var js,fjs=d.getElementsByTagName(s)[0],p=/^http:/.test(d.location)?'http':'https';if(!d.getElementById(id)){js=d.createElement(s);js.id=id;js.src=p+'://platform.twitter.com/widgets.js';fjs.parentNode.insertBefore(js,fjs);}}(document, 'script', 'twitter-wjs'); +</script> +<!-- Google Plus Button --> +<script async='' defer='defer' src='https://apis.google.com/js/platform.js'></script> +</body> </html> diff --git a/documentation/emulating-multiple-entrypoints.html b/documentation/emulating-multiple-entrypoints.html index 53b53cebbbef8d53f8b4727a5631bb6d069e9574..6393c59e1dde4b0bdc920a03f47d6dedff3e2bef 100644 --- a/documentation/emulating-multiple-entrypoints.html +++ b/documentation/emulating-multiple-entrypoints.html @@ -1,337 +1,338 @@ <!DOCTYPE html> <html lang='en'> - <head> - <meta content='text/html; charset=UTF-8' http-equiv='Content-Type'> - <meta content='width=device-width, initial-scale=1' name='viewport'> - <meta content='IE=edge' http-equiv='X-UA-Compatible'> - <meta content='no' name='msapplication-tap-highlight'> - <meta content='Aparapi is an Open-source framework for executing native Java code on the GPU, developed by Syncleus.' name='description'> - <title> - Aparapi | Emulating Multiple Entrypoints - </title> - <!-- Favicons --> - <link href='/images/favicon/apple-touch-icon-152x152.png' rel='apple-touch-icon-precomposed'> - <meta content='#FFFFFF' name='msapplication-TileColor'> - <meta content='/images/favicon/mstile-144x144.png' name='msapplication-TileImage'> - <link href='/images/favicon/favicon-32x32.png' rel='icon' sizes='32x32'> - <!-- Android 5 Chrome Color --> - <meta content='#EE6E73' name='theme-color'> - <!-- CSS --> - <link href='/stylesheets/highlight.css' media='screen,projection' rel='stylesheet' type='text/css'> - <link href='/stylesheets/style.css' media='screen,projection' rel='stylesheet' type='text/css'> - <link href='http://fonts.googleapis.com/css?family=Inconsolata' rel='stylesheet' type='text/css'> - <link href='http://fonts.googleapis.com/icon?family=Material+Icons' rel='stylesheet'> - </head> - <body> - <header> - <div class='container'> - <a class='button-collapse top-nav waves-effect waves-light circle hide-on-large-only' data-activates='nav-mobile' href='#'> - <i class='material-icons'>menu</i> - </a> - </div> - <ul class='side-nav fixed' id='nav-mobile'> - <li class='logo'> - <a class='brand-logo' href='/' id='logo-container'> - <object data='/images/logo.svg' id='front-page-logo' type='image/svg+xml'>Your browser does not support SVG</object> - </a> - </li> - <li class='search'> - <div class='search-wrapper card'> - <input id='search'> - <i class='material-icons'>search</i> - <div class='search-results'></div> - </div> - </li> - <li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/">Overview</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Introduction</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/about.html">About</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/getting-started.html">Getting Started</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/faq.html">FAQ</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Documentation</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/aparapi-patterns.html">Aparapi Patterns</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/choosing-specific-devices.html">Choosing Specific Devices</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/converting-java-to-opencl.html">Converting Java to OpenCL</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/emulating-multiple-entrypoints.html">Emulating Multiple Entrypoints</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/explicit-buffer-handling.html">Explicit Buffer Handling</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/hsa-enabled-lambda.html">HSA Enabled Lambda</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/kernel-guidelines.html">Kernel Guidelines</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/library-agent-duality.html">Library Agent Duality</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/new-features.html">New Features</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/opencl-bindings.html">OpenCL Bindings</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/private-memory-space.html">Private Memory Space</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/profiling-the-kernel.html">Profiling the Kernel</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/setting-up-hsa.html">Setting Up HSA</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/unit-tests.html">Unit Tests</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/using-hsa-simulator.html">Using HSA Simulator</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/constant-memory.html">Constant Memory</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/local-memory.html">Local Memory</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/multiple-dim-ranges.html">Multiple Dim Ranges</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Proposals</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/multiple-dim-nd-range.html">Multiple Dim ND Range</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/lambdas.html">Lambdas</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/address-space-with-buffers.html">Address Space with Buffers</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/extensions.html">Extensions</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/device.html">Device</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/multiple-entry-points.html">Multiple Entry Points</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/lambda-syntax.html">Lambda Syntax</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/showcase.html">Showcase</a></li></ul></li> - </ul> - </header> - <main> - <div class='section no-pad-bot' id='index-banner'> - <div class='container'> - <h1 class='header center-on-small-only'>Emulating Multiple Entrypoints</h1> - <div class='row center'> - <h4 class='header col s12 light center'>How to emulate multiple entrypoints using existing Aparapi APIs</h4> - </div> - - </div> - - </div> - <div class='container'> - <h2>Emulating Multiple Entrypoints Using Existing Aparapi APIs</h2> - - <p>Until we have support for multiple entrypoints in Aparapi, there are some tricks for emulating this feature.</p> - - <p>Suppose we wanted to create a general VectorMath kernel which might expose unary square, squareroot methods and binary addition and subtraction functionality. With our current API limitations we can’t easily do this, we can approximate having separate methods by passing a separate arg to dictate the ‘function’ that we wish to perform.</p> - <pre class="highlight java"><code> - <span class="kd">class</span> <span class="nc">VectorKernel</span> <span class="kd">extends</span> <span class="n">Kernel</span><span class="o">{</span> - <span class="kt">float</span><span class="o">[]</span> <span class="n">lhsOperand</span><span class="o">;</span> - <span class="kt">float</span><span class="o">[]</span> <span class="n">rhsOperand</span><span class="o">;</span> - <span class="kt">float</span><span class="o">[]</span> <span class="n">unaryOperand</span><span class="o">;</span> - <span class="kt">float</span><span class="o">[]</span> <span class="n">result</span><span class="o">;</span> - <span class="kd">final</span> <span class="kd">static</span> <span class="kt">int</span> <span class="n">FUNC_ADD</span> <span class="o">=</span><span class="mi">0</span><span class="o">;</span> - <span class="kd">final</span> <span class="kd">static</span> <span class="kt">int</span> <span class="n">FUNC_SUB</span> <span class="o">=</span><span class="mi">1</span><span class="o">;</span> - <span class="kd">final</span> <span class="kd">static</span> <span class="kt">int</span> <span class="n">FUNC_SQR</span> <span class="o">=</span><span class="mi">2</span><span class="o">;</span> - <span class="kd">final</span> <span class="kd">static</span> <span class="kt">int</span> <span class="n">FUNC_SQRT</span> <span class="o">=</span><span class="mi">3</span><span class="o">;</span> - <span class="c1">// other functions</span> - <span class="kt">int</span> <span class="n">function</span><span class="o">;</span> - <span class="nd">@Override</span> <span class="kd">public</span> <span class="kt">void</span> <span class="nf">run</span><span class="o">(){</span> - <span class="kt">int</span> <span class="n">gid</span> <span class="o">=</span> <span class="n">getGlobalId</span><span class="o">(</span><span class="mi">0</span><span class="o">){</span> - <span class="k">if</span> <span class="o">(</span><span class="n">function</span><span class="o">==</span><span class="n">FUNC_ADD</span><span class="o">){</span> - <span class="n">result</span><span class="o">[</span><span class="n">gid</span><span class="o">]=</span><span class="n">lhsOperand</span><span class="o">[</span><span class="n">gid</span><span class="o">]+</span><span class="n">rhsOperand</span><span class="o">[</span><span class="n">gid</span><span class="o">];</span> - <span class="o">}</span><span class="k">else</span> <span class="k">if</span> <span class="o">(</span><span class="n">function</span><span class="o">==</span><span class="n">FUNC_SUB</span><span class="o">){</span> - <span class="n">result</span><span class="o">[</span><span class="n">gid</span><span class="o">]=</span><span class="n">lhsOperand</span><span class="o">[</span><span class="n">gid</span><span class="o">]-</span><span class="n">rhsOperand</span><span class="o">[</span><span class="n">gid</span><span class="o">];</span> - <span class="o">}</span><span class="k">else</span> <span class="k">if</span> <span class="o">(</span><span class="n">function</span><span class="o">==</span><span class="n">FUNC_SQR</span><span class="o">){</span> - <span class="n">result</span><span class="o">[</span><span class="n">gid</span><span class="o">]=</span><span class="n">unaryOperand</span><span class="o">[</span><span class="n">gid</span><span class="o">]*</span><span class="n">unaryOperand</span><span class="o">[</span><span class="n">gid</span><span class="o">];</span> - <span class="o">}</span><span class="k">else</span> <span class="k">if</span> <span class="o">(</span><span class="n">function</span><span class="o">==</span><span class="n">FUNC_ADD</span><span class="o">){</span> - <span class="n">result</span><span class="o">[</span><span class="n">gid</span><span class="o">]=</span><span class="n">sqrt</span><span class="o">(</span><span class="n">unaryOperand</span><span class="o">[</span><span class="n">gid</span><span class="o">]);</span> - <span class="o">}</span><span class="k">else</span> <span class="k">if</span> <span class="o">....</span> - <span class="o">}</span> - <span class="o">}</span> - </code></pre> - <p>To use this for adding two vectors and then take the sqrt of the result we would use something like….</p> - <pre class="highlight java"><code> - <span class="kt">int</span> <span class="n">SIZE</span><span class="o">=</span><span class="mi">1024</span><span class="o">;</span> - <span class="n">Range</span> <span class="n">range</span> <span class="o">=</span> <span class="n">Range</span><span class="o">.</span><span class="na">create</span><span class="o">(</span><span class="n">SIZE</span><span class="o">);</span> - <span class="n">VectorKernel</span> <span class="n">vk</span> <span class="o">=</span> <span class="k">new</span> <span class="n">VectorKernel</span><span class="o">();</span> - <span class="n">vk</span><span class="o">.</span><span class="na">lhsOperand</span> <span class="o">=</span> <span class="k">new</span> <span class="kt">float</span><span class="o">[</span><span class="n">SIZE</span><span class="o">];</span> - <span class="n">vk</span><span class="o">.</span><span class="na">rhsOperand</span> <span class="o">=</span> <span class="k">new</span> <span class="kt">float</span><span class="o">[</span><span class="n">SIZE</span><span class="o">];</span> - <span class="n">vk</span><span class="o">.</span><span class="na">unaryOperand</span> <span class="o">=</span> <span class="k">new</span> <span class="kt">float</span><span class="o">[</span><span class="n">SIZE</span><span class="o">];</span> - <span class="n">vk</span><span class="o">.</span><span class="na">result</span> <span class="o">=</span> <span class="k">new</span> <span class="kt">float</span><span class="o">[</span><span class="n">SIZE</span><span class="o">];</span> - - <span class="c1">// fill lhsOperand ommitted</span> - <span class="c1">// fill rhsOperand ommitted</span> - <span class="n">vk</span><span class="o">.</span><span class="na">function</span> <span class="o">=</span> <span class="n">VectorKernel</span><span class="o">.</span><span class="na">FUNC_ADD</span><span class="o">;</span> - <span class="n">vk</span><span class="o">.</span><span class="na">execute</span><span class="o">(</span><span class="n">range</span><span class="o">);</span> - <span class="n">System</span><span class="o">.</span><span class="na">arrayCopy</span><span class="o">(</span><span class="n">vk</span><span class="o">.</span><span class="na">result</span><span class="o">,</span> <span class="mi">0</span><span class="o">,</span> <span class="n">vk</span><span class="o">.</span><span class="na">unaryOperand</span><span class="o">,</span> <span class="mi">0</span><span class="o">,</span> <span class="n">SIZE</span><span class="o">);</span> - <span class="n">vk</span><span class="o">.</span><span class="na">function</span> <span class="o">=</span> <span class="n">VectorKernel</span><span class="o">.</span><span class="na">FUNC_SQRT</span><span class="o">;</span> - <span class="n">vk</span><span class="o">.</span><span class="na">execute</span><span class="o">(</span><span class="n">range</span><span class="o">);</span> - </code></pre> - <p>This approach is fairly common and I have used it successfully to perform various pipeline stages for calculating FFT’s for example. Whilst this is functional it is not a great solution. First the API is clumsy. We have to mutate the state of the kernel instance and then re-arrange the arrays manually to chain math operations. We could of course hide all of this behind helper methods. One could imagine for example an implementation which exposes helper add(lhs, rhs)}}, or {{{sqrt() which hid all the nasty stuff.</p> - <pre class="highlight java"><code> - <span class="kd">class</span> <span class="nc">VectorKernel</span> <span class="kd">extends</span> <span class="n">Kernel</span><span class="o">{</span> - <span class="kt">float</span><span class="o">[]</span> <span class="n">lhsOperand</span><span class="o">;</span> - <span class="kt">float</span><span class="o">[]</span> <span class="n">rhsOperand</span><span class="o">;</span> - <span class="kt">float</span><span class="o">[]</span> <span class="n">unaryOperand</span><span class="o">;</span> - <span class="kt">float</span><span class="o">[]</span> <span class="n">result</span><span class="o">;</span> - <span class="kd">final</span> <span class="kd">static</span> <span class="kt">int</span> <span class="n">FUNC_ADD</span> <span class="o">=</span><span class="mi">0</span><span class="o">;</span> - <span class="kd">final</span> <span class="kd">static</span> <span class="kt">int</span> <span class="n">FUNC_SUB</span> <span class="o">=</span><span class="mi">1</span><span class="o">;</span> - <span class="kd">final</span> <span class="kd">static</span> <span class="kt">int</span> <span class="n">FUNC_SQR</span> <span class="o">=</span><span class="mi">2</span><span class="o">;</span> - <span class="kd">final</span> <span class="kd">static</span> <span class="kt">int</span> <span class="n">FUNC_SQRT</span> <span class="o">=</span><span class="mi">3</span><span class="o">;</span> - <span class="c1">// other functions</span> - <span class="kt">int</span> <span class="n">function</span><span class="o">;</span> - <span class="nd">@Override</span> <span class="kd">public</span> <span class="kt">void</span> <span class="nf">run</span><span class="o">(){</span> - <span class="kt">int</span> <span class="n">gid</span> <span class="o">=</span> <span class="n">getGlobalId</span><span class="o">(</span><span class="mi">0</span><span class="o">){</span> - <span class="k">if</span> <span class="o">(</span><span class="n">function</span><span class="o">==</span><span class="n">FUNC_ADD</span><span class="o">){</span> - <span class="n">result</span><span class="o">[</span><span class="n">gid</span><span class="o">]=</span><span class="n">lhsOperand</span><span class="o">[</span><span class="n">gid</span><span class="o">]+</span><span class="n">rhsOperand</span><span class="o">[</span><span class="n">gid</span><span class="o">];</span> - <span class="o">}</span><span class="k">else</span> <span class="k">if</span> <span class="o">(</span><span class="n">function</span><span class="o">==</span><span class="n">FUNC_SUB</span><span class="o">){</span> - <span class="n">result</span><span class="o">[</span><span class="n">gid</span><span class="o">]=</span><span class="n">lhsOperand</span><span class="o">[</span><span class="n">gid</span><span class="o">]-</span><span class="n">rhsOperand</span><span class="o">[</span><span class="n">gid</span><span class="o">];</span> - <span class="o">}</span><span class="k">else</span> <span class="k">if</span> <span class="o">(</span><span class="n">function</span><span class="o">==</span><span class="n">FUNC_SQR</span><span class="o">){</span> - <span class="n">result</span><span class="o">[</span><span class="n">gid</span><span class="o">]=</span><span class="n">unaryOperand</span><span class="o">[</span><span class="n">gid</span><span class="o">]*</span><span class="n">unaryOperand</span><span class="o">[</span><span class="n">gid</span><span class="o">];</span> - <span class="o">}</span><span class="k">else</span> <span class="k">if</span> <span class="o">(</span><span class="n">function</span><span class="o">==</span><span class="n">FUNC_ADD</span><span class="o">){</span> - <span class="n">result</span><span class="o">[</span><span class="n">gid</span><span class="o">]=</span><span class="n">sqrt</span><span class="o">(</span><span class="n">unaryOperand</span><span class="o">[</span><span class="n">gid</span><span class="o">]);</span> - <span class="o">}</span><span class="k">else</span> <span class="k">if</span> <span class="o">....</span> - <span class="o">}</span> - <span class="kd">private</span> <span class="kt">void</span> <span class="nf">binary</span><span class="o">(</span><span class="kt">int</span> <span class="n">operator</span><span class="o">,</span> <span class="kt">float</span><span class="o">[]</span> <span class="n">lhs</span><span class="o">,</span> <span class="kt">float</span><span class="o">[]</span> <span class="n">rhs</span><span class="o">){</span> - <span class="n">lhsOperand</span> <span class="o">=</span> <span class="n">lhs</span><span class="o">;</span> - <span class="n">rhsOperand</span> <span class="o">=</span> <span class="n">rhs</span><span class="o">;</span> - <span class="n">function</span><span class="o">=</span><span class="n">operator</span><span class="o">;</span> - <span class="n">execute</span><span class="o">(</span><span class="n">lhs</span><span class="o">.</span><span class="na">length</span><span class="o">());</span> - <span class="o">}</span> - <span class="kd">public</span> <span class="kt">void</span> <span class="nf">add</span><span class="o">(</span><span class="kt">float</span><span class="o">[]</span> <span class="n">lhs</span><span class="o">,</span> <span class="kt">float</span><span class="o">[]</span> <span class="n">rhs</span><span class="o">){</span> - <span class="n">binary</span><span class="o">(</span><span class="n">FUNC_ADD</span><span class="o">,</span> <span class="n">lhs</span><span class="o">,</span> <span class="n">rhs</span><span class="o">);</span> - <span class="o">}</span> - - <span class="kd">public</span> <span class="kt">void</span> <span class="nf">sub</span><span class="o">(</span><span class="kt">float</span><span class="o">[]</span> <span class="n">lhs</span><span class="o">,</span> <span class="kt">float</span><span class="o">[]</span> <span class="n">rhs</span><span class="o">){</span> - <span class="n">binary</span><span class="o">(</span><span class="n">FUNC_SUB</span><span class="o">,</span> <span class="n">lhs</span><span class="o">,</span> <span class="n">rhs</span><span class="o">);</span> - <span class="o">}</span> - - <span class="kd">private</span> <span class="kt">void</span> <span class="nf">binary</span><span class="o">(</span><span class="kt">int</span> <span class="n">operator</span><span class="o">,</span> <span class="kt">float</span><span class="o">[]</span> <span class="n">rhs</span><span class="o">){</span> - <span class="n">System</span><span class="o">.</span><span class="na">arrayCopy</span><span class="o">(</span><span class="n">result</span><span class="o">,</span> <span class="mi">0</span><span class="o">,</span> <span class="n">lhsOperand</span><span class="o">,</span> <span class="n">result</span><span class="o">.</span><span class="na">length</span><span class="o">);</span> - <span class="n">rhsOperand</span> <span class="o">=</span> <span class="n">rhs</span><span class="o">;</span> - <span class="n">function</span><span class="o">=</span><span class="n">operator</span><span class="o">;</span> - <span class="n">execute</span><span class="o">(</span><span class="n">lhsOperand</span><span class="o">.</span><span class="na">legth</span><span class="o">());</span> - <span class="o">}</span> - - <span class="kd">public</span> <span class="kt">void</span> <span class="nf">add</span><span class="o">(</span><span class="kt">float</span><span class="o">[]</span> <span class="n">rhs</span><span class="o">){</span> - <span class="n">binary</span><span class="o">(</span><span class="n">FUNC_ADD</span><span class="o">,</span> <span class="n">rhs</span><span class="o">);</span> - <span class="o">}</span> - - <span class="kd">public</span> <span class="kt">void</span> <span class="nf">sub</span><span class="o">(</span> <span class="kt">float</span><span class="o">[]</span> <span class="n">rhs</span><span class="o">){</span> - <span class="n">binary</span><span class="o">(</span><span class="n">FUNC_SUB</span><span class="o">,</span> <span class="n">rhs</span><span class="o">);</span> - <span class="o">}</span> - - <span class="kd">private</span> <span class="kt">void</span> <span class="nf">unary</span><span class="o">(</span><span class="kt">int</span> <span class="n">operator</span><span class="o">,</span> <span class="kt">float</span><span class="o">[]</span> <span class="n">unary</span><span class="o">){</span> - <span class="n">unaryOperand</span> <span class="o">=</span> <span class="n">unary</span><span class="o">;</span> - <span class="n">function</span><span class="o">=</span><span class="n">operator</span><span class="o">;</span> - <span class="n">execute</span><span class="o">(</span><span class="n">unaryOperand</span><span class="o">.</span><span class="na">length</span><span class="o">());</span> - <span class="o">}</span> - - <span class="kd">public</span> <span class="kt">void</span> <span class="nf">sqrt</span><span class="o">(</span><span class="kt">float</span><span class="o">[]</span> <span class="n">unary</span><span class="o">){</span> - <span class="n">unary</span><span class="o">(</span><span class="n">FUNC_SQRT</span><span class="o">,</span> <span class="n">unary</span><span class="o">);</span> - <span class="o">}</span> - - <span class="kd">private</span> <span class="kt">void</span> <span class="nf">unary</span><span class="o">(</span><span class="kt">int</span> <span class="n">operator</span><span class="o">){</span> - <span class="n">System</span><span class="o">.</span><span class="na">array</span><span class="o">.</span><span class="na">copy</span><span class="o">(</span><span class="n">result</span><span class="o">,</span> <span class="mi">0</span><span class="o">,</span> <span class="n">unaryOperand</span><span class="o">,</span> <span class="mi">0</span><span class="o">,</span> <span class="n">result</span><span class="o">.</span><span class="na">length</span><span class="o">);</span> - <span class="n">function</span><span class="o">=</span><span class="n">operator</span><span class="o">;</span> - <span class="n">execute</span><span class="o">(</span><span class="n">unaryOperand</span><span class="o">.</span><span class="na">length</span><span class="o">());</span> - <span class="o">}</span> - - <span class="kd">public</span> <span class="kt">void</span> <span class="nf">sqrt</span><span class="o">(){</span> - <span class="n">unary</span><span class="o">(</span><span class="n">FUNC_SQRT</span><span class="o">);</span> - <span class="o">}</span> - - <span class="o">}</span> - - <span class="n">VectorKernel</span> <span class="n">vk</span> <span class="o">=</span> <span class="k">new</span> <span class="n">VectorKernel</span><span class="o">(</span><span class="n">SIZE</span><span class="o">);</span> - <span class="n">vk</span><span class="o">.</span><span class="na">add</span><span class="o">(</span><span class="n">copyLhs</span><span class="o">,</span> <span class="n">copyRhs</span><span class="o">);</span> <span class="c1">// copies args to lhs and rhs operands</span> - <span class="c1">// sets function type</span> - <span class="c1">// and executes kernel</span> - <span class="n">vk</span><span class="o">.</span><span class="na">sqrt</span><span class="o">();</span> <span class="c1">// because we have no arg</span> - <span class="c1">// copies result to unary operand</span> - <span class="c1">// sets function type</span> - <span class="c1">// execute kernel</span> - </code></pre> - <p>However there is one more objection to this approach, namely that it by default will force unnecessarily buffer copies.</p> - - <p>When the bytecode for the above <code>Kernel.run()</code> method is analyzed Aparapi finds bytecode reading from lhsOperand, rhsOperand and unaryOperand arrays/buffers. Obviously at this bytecode analysis stage we can’t predict which 'function type’ will be used, so on every executions (Kernel.run()) Aparapi must copy all three buffers to the GPU. For binary operations this is one buffer copy wasted (the unaryOperand), for the unary operations we copy two buffers unnecessarily (lhsOperand and rhsOperand). We can of course use explicit buffer management to help us reduce these costs. Ideally we add this to our helper methods.</p> - <pre class="highlight java"><code> - <span class="kd">class</span> <span class="nc">VectorKernel</span> <span class="kd">extends</span> <span class="n">Kernel</span><span class="o">{</span> - <span class="kt">float</span><span class="o">[]</span> <span class="n">lhsOperand</span><span class="o">;</span> - <span class="kt">float</span><span class="o">[]</span> <span class="n">rhsOperand</span><span class="o">;</span> - <span class="kt">float</span><span class="o">[]</span> <span class="n">unaryOperand</span><span class="o">;</span> - <span class="kt">float</span><span class="o">[]</span> <span class="n">result</span><span class="o">;</span> - <span class="kd">final</span> <span class="kd">static</span> <span class="kt">int</span> <span class="n">FUNC_ADD</span> <span class="o">=</span><span class="mi">0</span><span class="o">;</span> - <span class="kd">final</span> <span class="kd">static</span> <span class="kt">int</span> <span class="n">FUNC_SUB</span> <span class="o">=</span><span class="mi">1</span><span class="o">;</span> - <span class="kd">final</span> <span class="kd">static</span> <span class="kt">int</span> <span class="n">FUNC_SQR</span> <span class="o">=</span><span class="mi">2</span><span class="o">;</span> - <span class="kd">final</span> <span class="kd">static</span> <span class="kt">int</span> <span class="n">FUNC_SQRT</span> <span class="o">=</span><span class="mi">3</span><span class="o">;</span> - <span class="c1">// other functions</span> - <span class="kt">int</span> <span class="n">function</span><span class="o">;</span> - <span class="nd">@Override</span> <span class="kd">public</span> <span class="kt">void</span> <span class="nf">run</span><span class="o">(){</span> - <span class="kt">int</span> <span class="n">gid</span> <span class="o">=</span> <span class="n">getGlobalId</span><span class="o">(</span><span class="mi">0</span><span class="o">){</span> - <span class="k">if</span> <span class="o">(</span><span class="n">function</span><span class="o">==</span><span class="n">FUNC_ADD</span><span class="o">){</span> - <span class="n">result</span><span class="o">[</span><span class="n">gid</span><span class="o">]=</span><span class="n">lhsOperand</span><span class="o">[</span><span class="n">gid</span><span class="o">]+</span><span class="n">rhsOperand</span><span class="o">[</span><span class="n">gid</span><span class="o">];</span> - <span class="o">}</span><span class="k">else</span> <span class="k">if</span> <span class="o">(</span><span class="n">function</span><span class="o">==</span><span class="n">FUNC_SUB</span><span class="o">){</span> - <span class="n">result</span><span class="o">[</span><span class="n">gid</span><span class="o">]=</span><span class="n">lhsOperand</span><span class="o">[</span><span class="n">gid</span><span class="o">]-</span><span class="n">rhsOperand</span><span class="o">[</span><span class="n">gid</span><span class="o">];</span> - <span class="o">}</span><span class="k">else</span> <span class="k">if</span> <span class="o">(</span><span class="n">function</span><span class="o">==</span><span class="n">FUNC_SQR</span><span class="o">){</span> - <span class="n">result</span><span class="o">[</span><span class="n">gid</span><span class="o">]=</span><span class="n">unaryOperand</span><span class="o">[</span><span class="n">gid</span><span class="o">]*</span><span class="n">unaryOperand</span><span class="o">[</span><span class="n">gid</span><span class="o">];</span> - <span class="o">}</span><span class="k">else</span> <span class="k">if</span> <span class="o">(</span><span class="n">function</span><span class="o">==</span><span class="n">FUNC_ADD</span><span class="o">){</span> - <span class="n">result</span><span class="o">[</span><span class="n">gid</span><span class="o">]=</span><span class="n">sqrt</span><span class="o">(</span><span class="n">unaryOperand</span><span class="o">[</span><span class="n">gid</span><span class="o">]);</span> - <span class="o">}</span><span class="k">else</span> <span class="k">if</span> <span class="o">....</span> - <span class="o">}</span> - <span class="kd">private</span> <span class="kt">void</span> <span class="nf">binary</span><span class="o">(</span><span class="kt">int</span> <span class="n">operator</span><span class="o">,</span> <span class="kt">float</span><span class="o">[]</span> <span class="n">lhs</span><span class="o">,</span> <span class="kt">float</span><span class="o">[]</span> <span class="n">rhs</span><span class="o">){</span> - <span class="n">lhsOperand</span> <span class="o">=</span> <span class="n">lhs</span><span class="o">;</span> - <span class="n">rhsOperand</span> <span class="o">=</span> <span class="n">rhs</span><span class="o">;</span> - <span class="n">function</span><span class="o">=</span><span class="n">operator</span><span class="o">;</span> - <span class="n">put</span><span class="o">(</span><span class="n">lhsOperand</span><span class="o">).</span><span class="na">put</span><span class="o">(</span><span class="n">rhsOperand</span><span class="o">);</span> - <span class="n">execute</span><span class="o">(</span><span class="n">lhs</span><span class="o">.</span><span class="na">length</span><span class="o">());</span> - <span class="n">get</span><span class="o">(</span><span class="n">result</span><span class="o">);</span> - <span class="o">}</span> - <span class="kd">public</span> <span class="kt">void</span> <span class="nf">add</span><span class="o">(</span><span class="kt">float</span><span class="o">[]</span> <span class="n">lhs</span><span class="o">,</span> <span class="kt">float</span><span class="o">[]</span> <span class="n">rhs</span><span class="o">){</span> - <span class="n">binary</span><span class="o">(</span><span class="n">FUNC_ADD</span><span class="o">,</span> <span class="n">lhs</span><span class="o">,</span> <span class="n">rhs</span><span class="o">);</span> - <span class="o">}</span> - - <span class="kd">public</span> <span class="kt">void</span> <span class="nf">sub</span><span class="o">(</span><span class="kt">float</span><span class="o">[]</span> <span class="n">lhs</span><span class="o">,</span> <span class="kt">float</span><span class="o">[]</span> <span class="n">rhs</span><span class="o">){</span> - <span class="n">binary</span><span class="o">(</span><span class="n">FUNC_SUB</span><span class="o">,</span> <span class="n">lhs</span><span class="o">,</span> <span class="n">rhs</span><span class="o">);</span> - <span class="o">}</span> - - <span class="kd">private</span> <span class="kt">void</span> <span class="nf">binary</span><span class="o">(</span><span class="kt">int</span> <span class="n">operator</span><span class="o">,</span> <span class="kt">float</span><span class="o">[]</span> <span class="n">rhs</span><span class="o">){</span> - <span class="n">System</span><span class="o">.</span><span class="na">arrayCopy</span><span class="o">(</span><span class="n">result</span><span class="o">,</span> <span class="mi">0</span><span class="o">,</span> <span class="n">lhsOperand</span><span class="o">,</span> <span class="n">result</span><span class="o">.</span><span class="na">length</span><span class="o">);</span> - <span class="n">rhsOperand</span> <span class="o">=</span> <span class="n">rhs</span><span class="o">;</span> - <span class="n">function</span><span class="o">=</span><span class="n">operator</span><span class="o">;</span> - <span class="n">put</span><span class="o">(</span><span class="n">lhsOperand</span><span class="o">).</span><span class="na">put</span><span class="o">(</span><span class="n">rhsOperand</span><span class="o">);</span> - <span class="n">execute</span><span class="o">(</span><span class="n">lhsOperand</span><span class="o">.</span><span class="na">legth</span><span class="o">());</span> - <span class="n">get</span><span class="o">(</span><span class="n">result</span><span class="o">);</span> - <span class="o">}</span> - - <span class="kd">public</span> <span class="kt">void</span> <span class="nf">add</span><span class="o">(</span><span class="kt">float</span><span class="o">[]</span> <span class="n">rhs</span><span class="o">){</span> - <span class="n">binary</span><span class="o">(</span><span class="n">FUNC_ADD</span><span class="o">,</span> <span class="n">rhs</span><span class="o">);</span> - <span class="o">}</span> - - <span class="kd">public</span> <span class="kt">void</span> <span class="nf">sub</span><span class="o">(</span> <span class="kt">float</span><span class="o">[]</span> <span class="n">rhs</span><span class="o">){</span> - <span class="n">binary</span><span class="o">(</span><span class="n">FUNC_SUB</span><span class="o">,</span> <span class="n">rhs</span><span class="o">);</span> - <span class="o">}</span> - - <span class="kd">private</span> <span class="kt">void</span> <span class="nf">unary</span><span class="o">(</span><span class="kt">int</span> <span class="n">operator</span><span class="o">,</span> <span class="kt">float</span><span class="o">[]</span> <span class="n">unary</span><span class="o">){</span> - <span class="n">unaryOperand</span> <span class="o">=</span> <span class="n">unary</span><span class="o">;</span> - <span class="n">function</span><span class="o">=</span><span class="n">operator</span><span class="o">;</span> - <span class="n">put</span><span class="o">(</span><span class="n">unaryOperand</span><span class="o">);</span> - <span class="n">execute</span><span class="o">(</span><span class="n">unaryOperand</span><span class="o">.</span><span class="na">length</span><span class="o">());</span> - <span class="n">get</span><span class="o">(</span><span class="n">result</span><span class="o">);</span> - <span class="o">}</span> - - <span class="kd">public</span> <span class="kt">void</span> <span class="nf">sqrt</span><span class="o">(</span><span class="kt">float</span><span class="o">[]</span> <span class="n">unary</span><span class="o">){</span> - <span class="n">unary</span><span class="o">(</span><span class="n">FUNC_SQRT</span><span class="o">,</span> <span class="n">unary</span><span class="o">);</span> - <span class="o">}</span> - - <span class="kd">private</span> <span class="kt">void</span> <span class="nf">unary</span><span class="o">(</span><span class="kt">int</span> <span class="n">operator</span><span class="o">){</span> - <span class="n">System</span><span class="o">.</span><span class="na">array</span><span class="o">.</span><span class="na">copy</span><span class="o">(</span><span class="n">result</span><span class="o">,</span> <span class="mi">0</span><span class="o">,</span> <span class="n">unaryOperand</span><span class="o">,</span> <span class="mi">0</span><span class="o">,</span> <span class="n">result</span><span class="o">.</span><span class="na">length</span><span class="o">);</span> - <span class="n">function</span><span class="o">=</span><span class="n">operator</span><span class="o">;</span> - <span class="n">put</span><span class="o">(</span><span class="n">unaryOperand</span><span class="o">);</span> - <span class="n">execute</span><span class="o">(</span><span class="n">unaryOperand</span><span class="o">.</span><span class="na">length</span><span class="o">());</span> - <span class="n">get</span><span class="o">(</span><span class="n">result</span><span class="o">);</span> - - <span class="o">}</span> - - <span class="kd">public</span> <span class="kt">void</span> <span class="nf">sqrt</span><span class="o">(){</span> - <span class="n">unary</span><span class="o">(</span><span class="n">FUNC_SQRT</span><span class="o">);</span> - <span class="o">}</span> - - <span class="o">}</span> - </code></pre> - </div> - </main> - <footer class='page-footer'> - <div class='container'> - <div class='row'> - <div class='col l4 s12'> - <h5 class='white-text'>Help Aparapi Grow</h5> - <p class='grey-text text-lighten-4'>We are a team of volunteers working on this project like it's our full time job. Any amount would help support and continue development on this project and is greatly appreciated.</p> - <form action='https://www.paypal.com/cgi-bin/webscr' id='paypal-donate' method='post' target='_top'> - <input name='cmd' type='hidden' value='_s-xclick'> - <input name='encrypted' type='hidden' value='-----BEGIN PKCS7-----MIIHoAYJKoZIhvcNAQcEoIIHkTCCB40CAQExggEwMIIBLAIBADCBlDCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20CAQAwDQYJKoZIhvcNAQEBBQAEgYATcKxN8t35TG2x34eY272SuZO3QbGy+BTGIM5DRV6Hmosotzw2TF42ceWmbXb3Gk4Wy5kUgo4TgHExCZHUSlHUl+A9KWLFejotgQJPhbiBsnns3klWbKftA3LEnP/kz/SW7OyBlpluoHoEGb354/aoX3JEctp3akHiZEmD7JyEgjELMAkGBSsOAwIaBQAwggEcBgkqhkiG9w0BBwEwFAYIKoZIhvcNAwcECOGCJwba6JICgIH4RtE1LE3juagKs+swI5tb9Y2LacWo+qn1H1aLKeg57bQMqqcWYvkoO1joYoglPc1h4mO0egZjHPQ6ih0K0IYlXw2SRpNylSlIMUE3GW6smjSSwRhscZfXQYUnmQsfYvkFwoKrlZGf/1u0Q7nwlZ1szIKnDMZ5f+k8xBcM0sMNutn/y9CH6A3zo01gQBIF29+1WYAoQspNAnfWQy3ydV7nbjIA9ThDp2WquWw3EVlvqlvm/3C2AFuH/L4q0ltn3qjkCdzXK0O2jW3TRrzligPkAy6CN0Tw2jGW5GENNC1L92vHFH4kBXUPlhvw39TgoN7/KRUjVoYPYgugggOHMIIDgzCCAuygAwIBAgIBADANBgkqhkiG9w0BAQUFADCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20wHhcNMDQwMjEzMTAxMzE1WhcNMzUwMjEzMTAxMzE1WjCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20wgZ8wDQYJKoZIhvcNAQEBBQADgY0AMIGJAoGBAMFHTt38RMxLXJyO2SmS+Ndl72T7oKJ4u4uw+6awntALWh03PewmIJuzbALScsTS4sZoS1fKciBGoh11gIfHzylvkdNe/hJl66/RGqrj5rFb08sAABNTzDTiqqNpJeBsYs/c2aiGozptX2RlnBktH+SUNpAajW724Nv2Wvhif6sFAgMBAAGjge4wgeswHQYDVR0OBBYEFJaffLvGbxe9WT9S1wob7BDWZJRrMIG7BgNVHSMEgbMwgbCAFJaffLvGbxe9WT9S1wob7BDWZJRroYGUpIGRMIGOMQswCQYDVQQGEwJVUzELMAkGA1UECBMCQ0ExFjAUBgNVBAcTDU1vdW50YWluIFZpZXcxFDASBgNVBAoTC1BheVBhbCBJbmMuMRMwEQYDVQQLFApsaXZlX2NlcnRzMREwDwYDVQQDFAhsaXZlX2FwaTEcMBoGCSqGSIb3DQEJARYNcmVAcGF5cGFsLmNvbYIBADAMBgNVHRMEBTADAQH/MA0GCSqGSIb3DQEBBQUAA4GBAIFfOlaagFrl71+jq6OKidbWFSE+Q4FqROvdgIONth+8kSK//Y/4ihuE4Ymvzn5ceE3S/iBSQQMjyvb+s2TWbQYDwcp129OPIbD9epdr4tJOUNiSojw7BHwYRiPh58S1xGlFgHFXwrEBb3dgNbMUa+u4qectsMAXpVHnD9wIyfmHMYIBmjCCAZYCAQEwgZQwgY4xCzAJBgNVBAYTAlVTMQswCQYDVQQIEwJDQTEWMBQGA1UEBxMNTW91bnRhaW4gVmlldzEUMBIGA1UEChMLUGF5UGFsIEluYy4xEzARBgNVBAsUCmxpdmVfY2VydHMxETAPBgNVBAMUCGxpdmVfYXBpMRwwGgYJKoZIhvcNAQkBFg1yZUBwYXlwYWwuY29tAgEAMAkGBSsOAwIaBQCgXTAYBgkqhkiG9w0BCQMxCwYJKoZIhvcNAQcBMBwGCSqGSIb3DQEJBTEPFw0xNjExMjkyMjA1NTNaMCMGCSqGSIb3DQEJBDEWBBS4i3Exr/pFcKOJy8uKmH+nGIMjqDANBgkqhkiG9w0BAQEFAASBgDAbFZ2jieloeB/0wCAcvYCFAIXmmBaMS5js/byzU5gK7exSTlRMX74IkmHemItaOcw3wyFlu4i118D9K9SbSbFiX9DGDcezGh42u/6G8TuZMwlvmiehwMioTVcm4jWG40YLiv8pJZypfoSx2w4IAFb4na5i/E1qOrwQOpiBho+s-----END PKCS7-----'> - <button alt='PayPal - The safer, easier way to pay online!' class='btn waves-effect waves-light red lighten-3' name='action' type='submit'> - Donate Now - </button> - </form> - </div> - <div class='col l4 s12'> - <h5 class='white-text'>Join the Discussion</h5> - <p class='grey-text text-lighten-4'>We have a Gitter chat room set up where you can talk directly with us. Come in and discuss new features, future goals, general problems or questions, or anything else you can think of.</p> - <a class='btn waves-effect waves-light red lighten-3' href='https://gitter.im/Syncleus/aparapi' target='_blank'>Chat</a> - </div> - <div class='col l4 s12' style='overflow: hidden;'> - <h5 class='white-text'>Connect</h5> - <iframe allowtransparency='true' frameborder='0' height='30' scrolling='0' src='http://ghbtns.com/github-btn.html?user=Syncleus&repo=aparapi&type=watch&count=true&size=large' width='170'></iframe> - <br> - <a class='twitter-follow-button' data-dnt='true' data-show-count='true' data-size='large' href='https://twitter.com/AparapiLib'>Follow @AparapiLib</a> - <br> - <div class='g-follow' data-annotation='bubble' data-height='24' data-href='https://plus.google.com/102266131584900704956' data-rel='publisher'></div> - </div> - </div> - </div> - <div class='footer-copyright'> - <div class='container'> - © 2016-2017 Syncleus, All rights reserved. - <a class='grey-text text-lighten-4 right' href='https://github.com/Syncleus/aparapi/blob/master/LICENSE'>Apache License v2</a> - </div> - </div> - </footer> - <!-- Scripts --> - <script src='https://code.jquery.com/jquery-2.1.4.min.js'></script> - <script> - if (!window.jQuery) { document.write('<script src="bin/jquery-2.1.1.min.js"><\/script>'); } - </script> - <script src='/javascripts/jquery.timeago.js'></script> - <script src='/javascripts/materialize.min.js'></script> - <script src='/javascripts/lunr.min.js'></script> - <script src='/javascripts/search.js'></script> - <script src='/javascripts/materialize.js'></script> - <script src='/javascripts/init.js'></script> - <!-- Twitter Button --> - <script> - !function(d,s,id){var js,fjs=d.getElementsByTagName(s)[0],p=/^http:/.test(d.location)?'http':'https';if(!d.getElementById(id)){js=d.createElement(s);js.id=id;js.src=p+'://platform.twitter.com/widgets.js';fjs.parentNode.insertBefore(js,fjs);}}(document, 'script', 'twitter-wjs'); - </script> - <!-- Google Plus Button --> - <script async='' defer='defer' src='https://apis.google.com/js/platform.js'></script> - </body> +<head> +<meta content='text/html; charset=UTF-8' http-equiv='Content-Type'> +<meta content='width=device-width, initial-scale=1' name='viewport'> +<meta content='IE=edge' http-equiv='X-UA-Compatible'> +<meta content='no' name='msapplication-tap-highlight'> +<meta content='Aparapi is an Open-source framework for executing native Java code on the GPU, developed by Syncleus.' name='description'> +<title> +Aparapi | Emulating Multiple Entrypoints +</title> +<!-- Favicons --> +<link href='/images/favicon/apple-touch-icon-152x152.png' rel='apple-touch-icon-precomposed'> +<meta content='#FFFFFF' name='msapplication-TileColor'> +<meta content='/images/favicon/mstile-144x144.png' name='msapplication-TileImage'> +<link href='/images/favicon/favicon-32x32.png' rel='icon' sizes='32x32'> +<!-- Android 5 Chrome Color --> +<meta content='#EE6E73' name='theme-color'> +<!-- CSS --> +<link href='/stylesheets/highlight.css' media='screen,projection' rel='stylesheet' type='text/css'> +<link href='/stylesheets/style.css' media='screen,projection' rel='stylesheet' type='text/css'> +<link href='http://fonts.googleapis.com/css?family=Inconsolata' rel='stylesheet' type='text/css'> +<link href='http://fonts.googleapis.com/icon?family=Material+Icons' rel='stylesheet'> +</head> +<body> +<header> +<div class='container'> +<a class='button-collapse top-nav waves-effect waves-light circle hide-on-large-only' data-activates='nav-mobile' href='#'> +<i class='material-icons'>menu</i> +</a> +</div> +<ul class='side-nav fixed' id='nav-mobile'> +<li class='logo'> +<a class='brand-logo' href='/' id='logo-container'> +<object data='/images/logo.svg' id='front-page-logo' type='image/svg+xml'>Your browser does not support SVG</object> +</a> +</li> +<li class='search'> +<div class='search-wrapper card'> +<input id='search'> +<i class='material-icons'>search</i> +<div class='search-results'></div> +</div> +</li> +<li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/">Overview</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Introduction</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/about.html">About</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/getting-started.html">Getting Started</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/faq.html">FAQ</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Documentation</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/aparapi-patterns.html">Aparapi Patterns</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/choosing-specific-devices.html">Choosing Specific Devices</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/converting-java-to-opencl.html">Converting Java to OpenCL</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/emulating-multiple-entrypoints.html">Emulating Multiple Entrypoints</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/explicit-buffer-handling.html">Explicit Buffer Handling</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/hsa-enabled-lambda.html">HSA Enabled Lambda</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/kernel-guidelines.html">Kernel Guidelines</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/library-agent-duality.html">Library Agent Duality</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/new-features.html">New Features</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/opencl-bindings.html">OpenCL Bindings</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/private-memory-space.html">Private Memory Space</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/profiling-the-kernel.html">Profiling the Kernel</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/setting-up-hsa.html">Setting Up HSA</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/unit-tests.html">Unit Tests</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/using-hsa-simulator.html">Using HSA Simulator</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/constant-memory.html">Constant Memory</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/local-memory.html">Local Memory</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/multiple-dim-ranges.html">Multiple Dim Ranges</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Proposals</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/multiple-dim-nd-range.html">Multiple Dim ND Range</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/lambdas.html">Lambdas</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/address-space-with-buffers.html">Address Space with Buffers</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/extensions.html">Extensions</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/device.html">Device</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/multiple-entry-points.html">Multiple Entry Points</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/lambda-syntax.html">Lambda Syntax</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/showcase.html">Showcase</a></li></ul></li> +</ul> +</header> +<main> +<div class='section no-pad-bot' id='index-banner'> +<div class='container'> +<h1 class='header center-on-small-only'>Emulating Multiple Entrypoints</h1> +<div class='row center'> +<h4 class='header col s12 light center'>How to emulate multiple entrypoints using existing Aparapi APIs</h4> +</div> + +</div> + +</div> +<div class='container'> +<h2>Emulating Multiple Entrypoints Using Existing Aparapi APIs</h2> + +<p>Until we have support for multiple entrypoints in Aparapi, there are some tricks for emulating this feature.</p> + +<p>Suppose we wanted to create a general VectorMath kernel which might expose unary square, squareroot methods and binary addition and subtraction functionality. With our current API limitations we can’t easily do this, we can approximate having separate methods by passing a separate arg to dictate the ‘function’ that we wish to perform.</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="kd">class</span> <span class="nc">VectorKernel</span> <span class="kd">extends</span> <span class="n">Kernel</span><span class="o">{</span> + <span class="kt">float</span><span class="o">[]</span> <span class="n">lhsOperand</span><span class="o">;</span> + <span class="kt">float</span><span class="o">[]</span> <span class="n">rhsOperand</span><span class="o">;</span> + <span class="kt">float</span><span class="o">[]</span> <span class="n">unaryOperand</span><span class="o">;</span> + <span class="kt">float</span><span class="o">[]</span> <span class="n">result</span><span class="o">;</span> + <span class="kd">final</span> <span class="kd">static</span> <span class="kt">int</span> <span class="n">FUNC_ADD</span> <span class="o">=</span><span class="mi">0</span><span class="o">;</span> + <span class="kd">final</span> <span class="kd">static</span> <span class="kt">int</span> <span class="n">FUNC_SUB</span> <span class="o">=</span><span class="mi">1</span><span class="o">;</span> + <span class="kd">final</span> <span class="kd">static</span> <span class="kt">int</span> <span class="n">FUNC_SQR</span> <span class="o">=</span><span class="mi">2</span><span class="o">;</span> + <span class="kd">final</span> <span class="kd">static</span> <span class="kt">int</span> <span class="n">FUNC_SQRT</span> <span class="o">=</span><span class="mi">3</span><span class="o">;</span> + <span class="c1">// other functions</span> + <span class="kt">int</span> <span class="n">function</span><span class="o">;</span> + <span class="nd">@Override</span> <span class="kd">public</span> <span class="kt">void</span> <span class="nf">run</span><span class="o">(){</span> + <span class="kt">int</span> <span class="n">gid</span> <span class="o">=</span> <span class="n">getGlobalId</span><span class="o">(</span><span class="mi">0</span><span class="o">){</span> + <span class="k">if</span> <span class="o">(</span><span class="n">function</span><span class="o">==</span><span class="n">FUNC_ADD</span><span class="o">){</span> + <span class="n">result</span><span class="o">[</span><span class="n">gid</span><span class="o">]=</span><span class="n">lhsOperand</span><span class="o">[</span><span class="n">gid</span><span class="o">]+</span><span class="n">rhsOperand</span><span class="o">[</span><span class="n">gid</span><span class="o">];</span> + <span class="o">}</span><span class="k">else</span> <span class="k">if</span> <span class="o">(</span><span class="n">function</span><span class="o">==</span><span class="n">FUNC_SUB</span><span class="o">){</span> + <span class="n">result</span><span class="o">[</span><span class="n">gid</span><span class="o">]=</span><span class="n">lhsOperand</span><span class="o">[</span><span class="n">gid</span><span class="o">]-</span><span class="n">rhsOperand</span><span class="o">[</span><span class="n">gid</span><span class="o">];</span> + <span class="o">}</span><span class="k">else</span> <span class="k">if</span> <span class="o">(</span><span class="n">function</span><span class="o">==</span><span class="n">FUNC_SQR</span><span class="o">){</span> + <span class="n">result</span><span class="o">[</span><span class="n">gid</span><span class="o">]=</span><span class="n">unaryOperand</span><span class="o">[</span><span class="n">gid</span><span class="o">]*</span><span class="n">unaryOperand</span><span class="o">[</span><span class="n">gid</span><span class="o">];</span> + <span class="o">}</span><span class="k">else</span> <span class="k">if</span> <span class="o">(</span><span class="n">function</span><span class="o">==</span><span class="n">FUNC_ADD</span><span class="o">){</span> + <span class="n">result</span><span class="o">[</span><span class="n">gid</span><span class="o">]=</span><span class="n">sqrt</span><span class="o">(</span><span class="n">unaryOperand</span><span class="o">[</span><span class="n">gid</span><span class="o">]);</span> + <span class="o">}</span><span class="k">else</span> <span class="k">if</span> <span class="o">....</span> + <span class="o">}</span> +<span class="o">}</span> +</code></pre></div> +<p>To use this for adding two vectors and then take the sqrt of the result we would use something like….</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="kt">int</span> <span class="n">SIZE</span><span class="o">=</span><span class="mi">1024</span><span class="o">;</span> +<span class="n">Range</span> <span class="n">range</span> <span class="o">=</span> <span class="n">Range</span><span class="o">.</span><span class="na">create</span><span class="o">(</span><span class="n">SIZE</span><span class="o">);</span> +<span class="n">VectorKernel</span> <span class="n">vk</span> <span class="o">=</span> <span class="k">new</span> <span class="n">VectorKernel</span><span class="o">();</span> +<span class="n">vk</span><span class="o">.</span><span class="na">lhsOperand</span> <span class="o">=</span> <span class="k">new</span> <span class="kt">float</span><span class="o">[</span><span class="n">SIZE</span><span class="o">];</span> +<span class="n">vk</span><span class="o">.</span><span class="na">rhsOperand</span> <span class="o">=</span> <span class="k">new</span> <span class="kt">float</span><span class="o">[</span><span class="n">SIZE</span><span class="o">];</span> +<span class="n">vk</span><span class="o">.</span><span class="na">unaryOperand</span> <span class="o">=</span> <span class="k">new</span> <span class="kt">float</span><span class="o">[</span><span class="n">SIZE</span><span class="o">];</span> +<span class="n">vk</span><span class="o">.</span><span class="na">result</span> <span class="o">=</span> <span class="k">new</span> <span class="kt">float</span><span class="o">[</span><span class="n">SIZE</span><span class="o">];</span> + +<span class="c1">// fill lhsOperand ommitted</span> +<span class="c1">// fill rhsOperand ommitted</span> +<span class="n">vk</span><span class="o">.</span><span class="na">function</span> <span class="o">=</span> <span class="n">VectorKernel</span><span class="o">.</span><span class="na">FUNC_ADD</span><span class="o">;</span> +<span class="n">vk</span><span class="o">.</span><span class="na">execute</span><span class="o">(</span><span class="n">range</span><span class="o">);</span> +<span class="n">System</span><span class="o">.</span><span class="na">arrayCopy</span><span class="o">(</span><span class="n">vk</span><span class="o">.</span><span class="na">result</span><span class="o">,</span> <span class="mi">0</span><span class="o">,</span> <span class="n">vk</span><span class="o">.</span><span class="na">unaryOperand</span><span class="o">,</span> <span class="mi">0</span><span class="o">,</span> <span class="n">SIZE</span><span class="o">);</span> +<span class="n">vk</span><span class="o">.</span><span class="na">function</span> <span class="o">=</span> <span class="n">VectorKernel</span><span class="o">.</span><span class="na">FUNC_SQRT</span><span class="o">;</span> +<span class="n">vk</span><span class="o">.</span><span class="na">execute</span><span class="o">(</span><span class="n">range</span><span class="o">);</span> +</code></pre></div> +<p>This approach is fairly common and I have used it successfully to perform various pipeline stages for calculating FFT’s for example. Whilst this is functional it is not a great solution. First the API is clumsy. We have to mutate the state of the kernel instance and then re-arrange the arrays manually to chain math operations. We could of course hide all of this behind helper methods. One could imagine for example an implementation which exposes helper add(lhs, rhs)}}, or {{{sqrt() which hid all the nasty stuff.</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="kd">class</span> <span class="nc">VectorKernel</span> <span class="kd">extends</span> <span class="n">Kernel</span><span class="o">{</span> + <span class="kt">float</span><span class="o">[]</span> <span class="n">lhsOperand</span><span class="o">;</span> + <span class="kt">float</span><span class="o">[]</span> <span class="n">rhsOperand</span><span class="o">;</span> + <span class="kt">float</span><span class="o">[]</span> <span class="n">unaryOperand</span><span class="o">;</span> + <span class="kt">float</span><span class="o">[]</span> <span class="n">result</span><span class="o">;</span> + <span class="kd">final</span> <span class="kd">static</span> <span class="kt">int</span> <span class="n">FUNC_ADD</span> <span class="o">=</span><span class="mi">0</span><span class="o">;</span> + <span class="kd">final</span> <span class="kd">static</span> <span class="kt">int</span> <span class="n">FUNC_SUB</span> <span class="o">=</span><span class="mi">1</span><span class="o">;</span> + <span class="kd">final</span> <span class="kd">static</span> <span class="kt">int</span> <span class="n">FUNC_SQR</span> <span class="o">=</span><span class="mi">2</span><span class="o">;</span> + <span class="kd">final</span> <span class="kd">static</span> <span class="kt">int</span> <span class="n">FUNC_SQRT</span> <span class="o">=</span><span class="mi">3</span><span class="o">;</span> + <span class="c1">// other functions</span> + <span class="kt">int</span> <span class="n">function</span><span class="o">;</span> + <span class="nd">@Override</span> <span class="kd">public</span> <span class="kt">void</span> <span class="nf">run</span><span class="o">(){</span> + <span class="kt">int</span> <span class="n">gid</span> <span class="o">=</span> <span class="n">getGlobalId</span><span class="o">(</span><span class="mi">0</span><span class="o">){</span> + <span class="k">if</span> <span class="o">(</span><span class="n">function</span><span class="o">==</span><span class="n">FUNC_ADD</span><span class="o">){</span> + <span class="n">result</span><span class="o">[</span><span class="n">gid</span><span class="o">]=</span><span class="n">lhsOperand</span><span class="o">[</span><span class="n">gid</span><span class="o">]+</span><span class="n">rhsOperand</span><span class="o">[</span><span class="n">gid</span><span class="o">];</span> + <span class="o">}</span><span class="k">else</span> <span class="k">if</span> <span class="o">(</span><span class="n">function</span><span class="o">==</span><span class="n">FUNC_SUB</span><span class="o">){</span> + <span class="n">result</span><span class="o">[</span><span class="n">gid</span><span class="o">]=</span><span class="n">lhsOperand</span><span class="o">[</span><span class="n">gid</span><span class="o">]-</span><span class="n">rhsOperand</span><span class="o">[</span><span class="n">gid</span><span class="o">];</span> + <span class="o">}</span><span class="k">else</span> <span class="k">if</span> <span class="o">(</span><span class="n">function</span><span class="o">==</span><span class="n">FUNC_SQR</span><span class="o">){</span> + <span class="n">result</span><span class="o">[</span><span class="n">gid</span><span class="o">]=</span><span class="n">unaryOperand</span><span class="o">[</span><span class="n">gid</span><span class="o">]*</span><span class="n">unaryOperand</span><span class="o">[</span><span class="n">gid</span><span class="o">];</span> + <span class="o">}</span><span class="k">else</span> <span class="k">if</span> <span class="o">(</span><span class="n">function</span><span class="o">==</span><span class="n">FUNC_ADD</span><span class="o">){</span> + <span class="n">result</span><span class="o">[</span><span class="n">gid</span><span class="o">]=</span><span class="n">sqrt</span><span class="o">(</span><span class="n">unaryOperand</span><span class="o">[</span><span class="n">gid</span><span class="o">]);</span> + <span class="o">}</span><span class="k">else</span> <span class="k">if</span> <span class="o">....</span> + <span class="o">}</span> + <span class="kd">private</span> <span class="kt">void</span> <span class="nf">binary</span><span class="o">(</span><span class="kt">int</span> <span class="n">operator</span><span class="o">,</span> <span class="kt">float</span><span class="o">[]</span> <span class="n">lhs</span><span class="o">,</span> <span class="kt">float</span><span class="o">[]</span> <span class="n">rhs</span><span class="o">){</span> + <span class="n">lhsOperand</span> <span class="o">=</span> <span class="n">lhs</span><span class="o">;</span> + <span class="n">rhsOperand</span> <span class="o">=</span> <span class="n">rhs</span><span class="o">;</span> + <span class="n">function</span><span class="o">=</span><span class="n">operator</span><span class="o">;</span> + <span class="n">execute</span><span class="o">(</span><span class="n">lhs</span><span class="o">.</span><span class="na">length</span><span class="o">());</span> + <span class="o">}</span> + <span class="kd">public</span> <span class="kt">void</span> <span class="nf">add</span><span class="o">(</span><span class="kt">float</span><span class="o">[]</span> <span class="n">lhs</span><span class="o">,</span> <span class="kt">float</span><span class="o">[]</span> <span class="n">rhs</span><span class="o">){</span> + <span class="n">binary</span><span class="o">(</span><span class="n">FUNC_ADD</span><span class="o">,</span> <span class="n">lhs</span><span class="o">,</span> <span class="n">rhs</span><span class="o">);</span> + <span class="o">}</span> + + <span class="kd">public</span> <span class="kt">void</span> <span class="nf">sub</span><span class="o">(</span><span class="kt">float</span><span class="o">[]</span> <span class="n">lhs</span><span class="o">,</span> <span class="kt">float</span><span class="o">[]</span> <span class="n">rhs</span><span class="o">){</span> + <span class="n">binary</span><span class="o">(</span><span class="n">FUNC_SUB</span><span class="o">,</span> <span class="n">lhs</span><span class="o">,</span> <span class="n">rhs</span><span class="o">);</span> + <span class="o">}</span> + + <span class="kd">private</span> <span class="kt">void</span> <span class="nf">binary</span><span class="o">(</span><span class="kt">int</span> <span class="n">operator</span><span class="o">,</span> <span class="kt">float</span><span class="o">[]</span> <span class="n">rhs</span><span class="o">){</span> + <span class="n">System</span><span class="o">.</span><span class="na">arrayCopy</span><span class="o">(</span><span class="n">result</span><span class="o">,</span> <span class="mi">0</span><span class="o">,</span> <span class="n">lhsOperand</span><span class="o">,</span> <span class="n">result</span><span class="o">.</span><span class="na">length</span><span class="o">);</span> + <span class="n">rhsOperand</span> <span class="o">=</span> <span class="n">rhs</span><span class="o">;</span> + <span class="n">function</span><span class="o">=</span><span class="n">operator</span><span class="o">;</span> + <span class="n">execute</span><span class="o">(</span><span class="n">lhsOperand</span><span class="o">.</span><span class="na">legth</span><span class="o">());</span> + <span class="o">}</span> + + <span class="kd">public</span> <span class="kt">void</span> <span class="nf">add</span><span class="o">(</span><span class="kt">float</span><span class="o">[]</span> <span class="n">rhs</span><span class="o">){</span> + <span class="n">binary</span><span class="o">(</span><span class="n">FUNC_ADD</span><span class="o">,</span> <span class="n">rhs</span><span class="o">);</span> + <span class="o">}</span> + + <span class="kd">public</span> <span class="kt">void</span> <span class="nf">sub</span><span class="o">(</span> <span class="kt">float</span><span class="o">[]</span> <span class="n">rhs</span><span class="o">){</span> + <span class="n">binary</span><span class="o">(</span><span class="n">FUNC_SUB</span><span class="o">,</span> <span class="n">rhs</span><span class="o">);</span> + <span class="o">}</span> + + <span class="kd">private</span> <span class="kt">void</span> <span class="nf">unary</span><span class="o">(</span><span class="kt">int</span> <span class="n">operator</span><span class="o">,</span> <span class="kt">float</span><span class="o">[]</span> <span class="n">unary</span><span class="o">){</span> + <span class="n">unaryOperand</span> <span class="o">=</span> <span class="n">unary</span><span class="o">;</span> + <span class="n">function</span><span class="o">=</span><span class="n">operator</span><span class="o">;</span> + <span class="n">execute</span><span class="o">(</span><span class="n">unaryOperand</span><span class="o">.</span><span class="na">length</span><span class="o">());</span> + <span class="o">}</span> + + <span class="kd">public</span> <span class="kt">void</span> <span class="nf">sqrt</span><span class="o">(</span><span class="kt">float</span><span class="o">[]</span> <span class="n">unary</span><span class="o">){</span> + <span class="n">unary</span><span class="o">(</span><span class="n">FUNC_SQRT</span><span class="o">,</span> <span class="n">unary</span><span class="o">);</span> + <span class="o">}</span> + + <span class="kd">private</span> <span class="kt">void</span> <span class="nf">unary</span><span class="o">(</span><span class="kt">int</span> <span class="n">operator</span><span class="o">){</span> + <span class="n">System</span><span class="o">.</span><span class="na">array</span><span class="o">.</span><span class="na">copy</span><span class="o">(</span><span class="n">result</span><span class="o">,</span> <span class="mi">0</span><span class="o">,</span> <span class="n">unaryOperand</span><span class="o">,</span> <span class="mi">0</span><span class="o">,</span> <span class="n">result</span><span class="o">.</span><span class="na">length</span><span class="o">);</span> + <span class="n">function</span><span class="o">=</span><span class="n">operator</span><span class="o">;</span> + <span class="n">execute</span><span class="o">(</span><span class="n">unaryOperand</span><span class="o">.</span><span class="na">length</span><span class="o">());</span> + <span class="o">}</span> + + <span class="kd">public</span> <span class="kt">void</span> <span class="nf">sqrt</span><span class="o">(){</span> + <span class="n">unary</span><span class="o">(</span><span class="n">FUNC_SQRT</span><span class="o">);</span> + <span class="o">}</span> + +<span class="o">}</span> + +<span class="n">VectorKernel</span> <span class="n">vk</span> <span class="o">=</span> <span class="k">new</span> <span class="n">VectorKernel</span><span class="o">(</span><span class="n">SIZE</span><span class="o">);</span> +<span class="n">vk</span><span class="o">.</span><span class="na">add</span><span class="o">(</span><span class="n">copyLhs</span><span class="o">,</span> <span class="n">copyRhs</span><span class="o">);</span> <span class="c1">// copies args to lhs and rhs operands</span> + <span class="c1">// sets function type</span> + <span class="c1">// and executes kernel</span> +<span class="n">vk</span><span class="o">.</span><span class="na">sqrt</span><span class="o">();</span> <span class="c1">// because we have no arg</span> + <span class="c1">// copies result to unary operand</span> + <span class="c1">// sets function type</span> + <span class="c1">// execute kernel</span> +</code></pre></div> +<p>However there is one more objection to this approach, namely that it by default will force unnecessarily buffer copies.</p> + +<p>When the bytecode for the above <code>Kernel.run()</code> method is analyzed Aparapi finds bytecode reading from lhsOperand, rhsOperand and unaryOperand arrays/buffers. Obviously at this bytecode analysis stage we can’t predict which ‘function type’ will be used, so on every executions (Kernel.run()) Aparapi must copy all three buffers to the GPU. For binary operations this is one buffer copy wasted (the unaryOperand), for the unary operations we copy two buffers unnecessarily (lhsOperand and rhsOperand). We can of course use explicit buffer management to help us reduce these costs. Ideally we add this to our helper methods.</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="kd">class</span> <span class="nc">VectorKernel</span> <span class="kd">extends</span> <span class="n">Kernel</span><span class="o">{</span> + <span class="kt">float</span><span class="o">[]</span> <span class="n">lhsOperand</span><span class="o">;</span> + <span class="kt">float</span><span class="o">[]</span> <span class="n">rhsOperand</span><span class="o">;</span> + <span class="kt">float</span><span class="o">[]</span> <span class="n">unaryOperand</span><span class="o">;</span> + <span class="kt">float</span><span class="o">[]</span> <span class="n">result</span><span class="o">;</span> + <span class="kd">final</span> <span class="kd">static</span> <span class="kt">int</span> <span class="n">FUNC_ADD</span> <span class="o">=</span><span class="mi">0</span><span class="o">;</span> + <span class="kd">final</span> <span class="kd">static</span> <span class="kt">int</span> <span class="n">FUNC_SUB</span> <span class="o">=</span><span class="mi">1</span><span class="o">;</span> + <span class="kd">final</span> <span class="kd">static</span> <span class="kt">int</span> <span class="n">FUNC_SQR</span> <span class="o">=</span><span class="mi">2</span><span class="o">;</span> + <span class="kd">final</span> <span class="kd">static</span> <span class="kt">int</span> <span class="n">FUNC_SQRT</span> <span class="o">=</span><span class="mi">3</span><span class="o">;</span> + <span class="c1">// other functions</span> + <span class="kt">int</span> <span class="n">function</span><span class="o">;</span> + <span class="nd">@Override</span> <span class="kd">public</span> <span class="kt">void</span> <span class="nf">run</span><span class="o">(){</span> + <span class="kt">int</span> <span class="n">gid</span> <span class="o">=</span> <span class="n">getGlobalId</span><span class="o">(</span><span class="mi">0</span><span class="o">){</span> + <span class="k">if</span> <span class="o">(</span><span class="n">function</span><span class="o">==</span><span class="n">FUNC_ADD</span><span class="o">){</span> + <span class="n">result</span><span class="o">[</span><span class="n">gid</span><span class="o">]=</span><span class="n">lhsOperand</span><span class="o">[</span><span class="n">gid</span><span class="o">]+</span><span class="n">rhsOperand</span><span class="o">[</span><span class="n">gid</span><span class="o">];</span> + <span class="o">}</span><span class="k">else</span> <span class="k">if</span> <span class="o">(</span><span class="n">function</span><span class="o">==</span><span class="n">FUNC_SUB</span><span class="o">){</span> + <span class="n">result</span><span class="o">[</span><span class="n">gid</span><span class="o">]=</span><span class="n">lhsOperand</span><span class="o">[</span><span class="n">gid</span><span class="o">]-</span><span class="n">rhsOperand</span><span class="o">[</span><span class="n">gid</span><span class="o">];</span> + <span class="o">}</span><span class="k">else</span> <span class="k">if</span> <span class="o">(</span><span class="n">function</span><span class="o">==</span><span class="n">FUNC_SQR</span><span class="o">){</span> + <span class="n">result</span><span class="o">[</span><span class="n">gid</span><span class="o">]=</span><span class="n">unaryOperand</span><span class="o">[</span><span class="n">gid</span><span class="o">]*</span><span class="n">unaryOperand</span><span class="o">[</span><span class="n">gid</span><span class="o">];</span> + <span class="o">}</span><span class="k">else</span> <span class="k">if</span> <span class="o">(</span><span class="n">function</span><span class="o">==</span><span class="n">FUNC_ADD</span><span class="o">){</span> + <span class="n">result</span><span class="o">[</span><span class="n">gid</span><span class="o">]=</span><span class="n">sqrt</span><span class="o">(</span><span class="n">unaryOperand</span><span class="o">[</span><span class="n">gid</span><span class="o">]);</span> + <span class="o">}</span><span class="k">else</span> <span class="k">if</span> <span class="o">....</span> + <span class="o">}</span> + <span class="kd">private</span> <span class="kt">void</span> <span class="nf">binary</span><span class="o">(</span><span class="kt">int</span> <span class="n">operator</span><span class="o">,</span> <span class="kt">float</span><span class="o">[]</span> <span class="n">lhs</span><span class="o">,</span> <span class="kt">float</span><span class="o">[]</span> <span class="n">rhs</span><span class="o">){</span> + <span class="n">lhsOperand</span> <span class="o">=</span> <span class="n">lhs</span><span class="o">;</span> + <span class="n">rhsOperand</span> <span class="o">=</span> <span class="n">rhs</span><span class="o">;</span> + <span class="n">function</span><span class="o">=</span><span class="n">operator</span><span class="o">;</span> + <span class="n">put</span><span class="o">(</span><span class="n">lhsOperand</span><span class="o">).</span><span class="na">put</span><span class="o">(</span><span class="n">rhsOperand</span><span class="o">);</span> + <span class="n">execute</span><span class="o">(</span><span class="n">lhs</span><span class="o">.</span><span class="na">length</span><span class="o">());</span> + <span class="n">get</span><span class="o">(</span><span class="n">result</span><span class="o">);</span> + <span class="o">}</span> + <span class="kd">public</span> <span class="kt">void</span> <span class="nf">add</span><span class="o">(</span><span class="kt">float</span><span class="o">[]</span> <span class="n">lhs</span><span class="o">,</span> <span class="kt">float</span><span class="o">[]</span> <span class="n">rhs</span><span class="o">){</span> + <span class="n">binary</span><span class="o">(</span><span class="n">FUNC_ADD</span><span class="o">,</span> <span class="n">lhs</span><span class="o">,</span> <span class="n">rhs</span><span class="o">);</span> + <span class="o">}</span> + + <span class="kd">public</span> <span class="kt">void</span> <span class="nf">sub</span><span class="o">(</span><span class="kt">float</span><span class="o">[]</span> <span class="n">lhs</span><span class="o">,</span> <span class="kt">float</span><span class="o">[]</span> <span class="n">rhs</span><span class="o">){</span> + <span class="n">binary</span><span class="o">(</span><span class="n">FUNC_SUB</span><span class="o">,</span> <span class="n">lhs</span><span class="o">,</span> <span class="n">rhs</span><span class="o">);</span> + <span class="o">}</span> + + <span class="kd">private</span> <span class="kt">void</span> <span class="nf">binary</span><span class="o">(</span><span class="kt">int</span> <span class="n">operator</span><span class="o">,</span> <span class="kt">float</span><span class="o">[]</span> <span class="n">rhs</span><span class="o">){</span> + <span class="n">System</span><span class="o">.</span><span class="na">arrayCopy</span><span class="o">(</span><span class="n">result</span><span class="o">,</span> <span class="mi">0</span><span class="o">,</span> <span class="n">lhsOperand</span><span class="o">,</span> <span class="n">result</span><span class="o">.</span><span class="na">length</span><span class="o">);</span> + <span class="n">rhsOperand</span> <span class="o">=</span> <span class="n">rhs</span><span class="o">;</span> + <span class="n">function</span><span class="o">=</span><span class="n">operator</span><span class="o">;</span> + <span class="n">put</span><span class="o">(</span><span class="n">lhsOperand</span><span class="o">).</span><span class="na">put</span><span class="o">(</span><span class="n">rhsOperand</span><span class="o">);</span> + <span class="n">execute</span><span class="o">(</span><span class="n">lhsOperand</span><span class="o">.</span><span class="na">legth</span><span class="o">());</span> + <span class="n">get</span><span class="o">(</span><span class="n">result</span><span class="o">);</span> + <span class="o">}</span> + + <span class="kd">public</span> <span class="kt">void</span> <span class="nf">add</span><span class="o">(</span><span class="kt">float</span><span class="o">[]</span> <span class="n">rhs</span><span class="o">){</span> + <span class="n">binary</span><span class="o">(</span><span class="n">FUNC_ADD</span><span class="o">,</span> <span class="n">rhs</span><span class="o">);</span> + <span class="o">}</span> + + <span class="kd">public</span> <span class="kt">void</span> <span class="nf">sub</span><span class="o">(</span> <span class="kt">float</span><span class="o">[]</span> <span class="n">rhs</span><span class="o">){</span> + <span class="n">binary</span><span class="o">(</span><span class="n">FUNC_SUB</span><span class="o">,</span> <span class="n">rhs</span><span class="o">);</span> + <span class="o">}</span> + + <span class="kd">private</span> <span class="kt">void</span> <span class="nf">unary</span><span class="o">(</span><span class="kt">int</span> <span class="n">operator</span><span class="o">,</span> <span class="kt">float</span><span class="o">[]</span> <span class="n">unary</span><span class="o">){</span> + <span class="n">unaryOperand</span> <span class="o">=</span> <span class="n">unary</span><span class="o">;</span> + <span class="n">function</span><span class="o">=</span><span class="n">operator</span><span class="o">;</span> + <span class="n">put</span><span class="o">(</span><span class="n">unaryOperand</span><span class="o">);</span> + <span class="n">execute</span><span class="o">(</span><span class="n">unaryOperand</span><span class="o">.</span><span class="na">length</span><span class="o">());</span> + <span class="n">get</span><span class="o">(</span><span class="n">result</span><span class="o">);</span> + <span class="o">}</span> + + <span class="kd">public</span> <span class="kt">void</span> <span class="nf">sqrt</span><span class="o">(</span><span class="kt">float</span><span class="o">[]</span> <span class="n">unary</span><span class="o">){</span> + <span class="n">unary</span><span class="o">(</span><span class="n">FUNC_SQRT</span><span class="o">,</span> <span class="n">unary</span><span class="o">);</span> + <span class="o">}</span> + + <span class="kd">private</span> <span class="kt">void</span> <span class="nf">unary</span><span class="o">(</span><span class="kt">int</span> <span class="n">operator</span><span class="o">){</span> + <span class="n">System</span><span class="o">.</span><span class="na">array</span><span class="o">.</span><span class="na">copy</span><span class="o">(</span><span class="n">result</span><span class="o">,</span> <span class="mi">0</span><span class="o">,</span> <span class="n">unaryOperand</span><span class="o">,</span> <span class="mi">0</span><span class="o">,</span> <span class="n">result</span><span class="o">.</span><span class="na">length</span><span class="o">);</span> + <span class="n">function</span><span class="o">=</span><span class="n">operator</span><span class="o">;</span> + <span class="n">put</span><span class="o">(</span><span class="n">unaryOperand</span><span class="o">);</span> + <span class="n">execute</span><span class="o">(</span><span class="n">unaryOperand</span><span class="o">.</span><span class="na">length</span><span class="o">());</span> + <span class="n">get</span><span class="o">(</span><span class="n">result</span><span class="o">);</span> + + <span class="o">}</span> + + <span class="kd">public</span> <span class="kt">void</span> <span class="nf">sqrt</span><span class="o">(){</span> + <span class="n">unary</span><span class="o">(</span><span class="n">FUNC_SQRT</span><span class="o">);</span> + <span class="o">}</span> + +<span class="o">}</span> +</code></pre></div> +</div> +</main> +<footer class='page-footer'> +<div class='container'> +<div class='row'> +<div class='col l4 s12'> +<h5 class='white-text'>Help Aparapi Grow</h5> +<p class='grey-text text-lighten-4'>We are a team of volunteers working on this project like it's our full time job. Any amount would help support and continue development on this project and is greatly appreciated.</p> +<form action='https://www.paypal.com/cgi-bin/webscr' id='paypal-donate' method='post' target='_top'> +<input name='cmd' type='hidden' value='_s-xclick'> +<input name='encrypted' type='hidden' value='-----BEGIN PKCS7-----MIIHoAYJKoZIhvcNAQcEoIIHkTCCB40CAQExggEwMIIBLAIBADCBlDCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20CAQAwDQYJKoZIhvcNAQEBBQAEgYATcKxN8t35TG2x34eY272SuZO3QbGy+BTGIM5DRV6Hmosotzw2TF42ceWmbXb3Gk4Wy5kUgo4TgHExCZHUSlHUl+A9KWLFejotgQJPhbiBsnns3klWbKftA3LEnP/kz/SW7OyBlpluoHoEGb354/aoX3JEctp3akHiZEmD7JyEgjELMAkGBSsOAwIaBQAwggEcBgkqhkiG9w0BBwEwFAYIKoZIhvcNAwcECOGCJwba6JICgIH4RtE1LE3juagKs+swI5tb9Y2LacWo+qn1H1aLKeg57bQMqqcWYvkoO1joYoglPc1h4mO0egZjHPQ6ih0K0IYlXw2SRpNylSlIMUE3GW6smjSSwRhscZfXQYUnmQsfYvkFwoKrlZGf/1u0Q7nwlZ1szIKnDMZ5f+k8xBcM0sMNutn/y9CH6A3zo01gQBIF29+1WYAoQspNAnfWQy3ydV7nbjIA9ThDp2WquWw3EVlvqlvm/3C2AFuH/L4q0ltn3qjkCdzXK0O2jW3TRrzligPkAy6CN0Tw2jGW5GENNC1L92vHFH4kBXUPlhvw39TgoN7/KRUjVoYPYgugggOHMIIDgzCCAuygAwIBAgIBADANBgkqhkiG9w0BAQUFADCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20wHhcNMDQwMjEzMTAxMzE1WhcNMzUwMjEzMTAxMzE1WjCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20wgZ8wDQYJKoZIhvcNAQEBBQADgY0AMIGJAoGBAMFHTt38RMxLXJyO2SmS+Ndl72T7oKJ4u4uw+6awntALWh03PewmIJuzbALScsTS4sZoS1fKciBGoh11gIfHzylvkdNe/hJl66/RGqrj5rFb08sAABNTzDTiqqNpJeBsYs/c2aiGozptX2RlnBktH+SUNpAajW724Nv2Wvhif6sFAgMBAAGjge4wgeswHQYDVR0OBBYEFJaffLvGbxe9WT9S1wob7BDWZJRrMIG7BgNVHSMEgbMwgbCAFJaffLvGbxe9WT9S1wob7BDWZJRroYGUpIGRMIGOMQswCQYDVQQGEwJVUzELMAkGA1UECBMCQ0ExFjAUBgNVBAcTDU1vdW50YWluIFZpZXcxFDASBgNVBAoTC1BheVBhbCBJbmMuMRMwEQYDVQQLFApsaXZlX2NlcnRzMREwDwYDVQQDFAhsaXZlX2FwaTEcMBoGCSqGSIb3DQEJARYNcmVAcGF5cGFsLmNvbYIBADAMBgNVHRMEBTADAQH/MA0GCSqGSIb3DQEBBQUAA4GBAIFfOlaagFrl71+jq6OKidbWFSE+Q4FqROvdgIONth+8kSK//Y/4ihuE4Ymvzn5ceE3S/iBSQQMjyvb+s2TWbQYDwcp129OPIbD9epdr4tJOUNiSojw7BHwYRiPh58S1xGlFgHFXwrEBb3dgNbMUa+u4qectsMAXpVHnD9wIyfmHMYIBmjCCAZYCAQEwgZQwgY4xCzAJBgNVBAYTAlVTMQswCQYDVQQIEwJDQTEWMBQGA1UEBxMNTW91bnRhaW4gVmlldzEUMBIGA1UEChMLUGF5UGFsIEluYy4xEzARBgNVBAsUCmxpdmVfY2VydHMxETAPBgNVBAMUCGxpdmVfYXBpMRwwGgYJKoZIhvcNAQkBFg1yZUBwYXlwYWwuY29tAgEAMAkGBSsOAwIaBQCgXTAYBgkqhkiG9w0BCQMxCwYJKoZIhvcNAQcBMBwGCSqGSIb3DQEJBTEPFw0xNjExMjkyMjA1NTNaMCMGCSqGSIb3DQEJBDEWBBS4i3Exr/pFcKOJy8uKmH+nGIMjqDANBgkqhkiG9w0BAQEFAASBgDAbFZ2jieloeB/0wCAcvYCFAIXmmBaMS5js/byzU5gK7exSTlRMX74IkmHemItaOcw3wyFlu4i118D9K9SbSbFiX9DGDcezGh42u/6G8TuZMwlvmiehwMioTVcm4jWG40YLiv8pJZypfoSx2w4IAFb4na5i/E1qOrwQOpiBho+s-----END PKCS7----- +'> +<button alt='PayPal - The safer, easier way to pay online!' class='btn waves-effect waves-light red lighten-3' name='action' type='submit'> +Donate Now +</button> +</form> +</div> +<div class='col l4 s12'> +<h5 class='white-text'>Join the Discussion</h5> +<p class='grey-text text-lighten-4'>We have a Gitter chat room set up where you can talk directly with us. Come in and discuss new features, future goals, general problems or questions, or anything else you can think of.</p> +<a class='btn waves-effect waves-light red lighten-3' href='https://gitter.im/Syncleus/aparapi' target='_blank'>Chat</a> +</div> +<div class='col l4 s12' style='overflow: hidden;'> +<h5 class='white-text'>Connect</h5> +<iframe allowtransparency='true' frameborder='0' height='30' scrolling='0' src='http://ghbtns.com/github-btn.html?user=Syncleus&repo=aparapi&type=watch&count=true&size=large' width='170'></iframe> +<br> +<a class='twitter-follow-button' data-dnt='true' data-show-count='true' data-size='large' href='https://twitter.com/AparapiLib'>Follow @AparapiLib</a> +<br> +<div class='g-follow' data-annotation='bubble' data-height='24' data-href='https://plus.google.com/102266131584900704956' data-rel='publisher'></div> +</div> +</div> +</div> +<div class='footer-copyright'> +<div class='container'> +© 2016-2017 Syncleus, All rights reserved. +<a class='grey-text text-lighten-4 right' href='https://github.com/Syncleus/aparapi/blob/master/LICENSE'>Apache License v2</a> +</div> +</div> +</footer> +<!-- Scripts --> +<script src='https://code.jquery.com/jquery-2.1.4.min.js'></script> +<script> + if (!window.jQuery) { document.write('<script src="bin/jquery-2.1.1.min.js"><\/script>'); } +</script> +<script src='/javascripts/jquery.timeago.js'></script> +<script src='/javascripts/materialize.min.js'></script> +<script src='/javascripts/lunr.min.js'></script> +<script src='/javascripts/search.js'></script> +<script src='/javascripts/materialize.js'></script> +<script src='/javascripts/init.js'></script> +<!-- Twitter Button --> +<script> + !function(d,s,id){var js,fjs=d.getElementsByTagName(s)[0],p=/^http:/.test(d.location)?'http':'https';if(!d.getElementById(id)){js=d.createElement(s);js.id=id;js.src=p+'://platform.twitter.com/widgets.js';fjs.parentNode.insertBefore(js,fjs);}}(document, 'script', 'twitter-wjs'); +</script> +<!-- Google Plus Button --> +<script async='' defer='defer' src='https://apis.google.com/js/platform.js'></script> +</body> </html> diff --git a/documentation/explicit-buffer-handling.html b/documentation/explicit-buffer-handling.html index f8b2868b65942aac627592af4e181b310ee193a7..823be7696d92517ec462f59753f4673e92f3a5d4 100644 --- a/documentation/explicit-buffer-handling.html +++ b/documentation/explicit-buffer-handling.html @@ -1,337 +1,338 @@ <!DOCTYPE html> <html lang='en'> - <head> - <meta content='text/html; charset=UTF-8' http-equiv='Content-Type'> - <meta content='width=device-width, initial-scale=1' name='viewport'> - <meta content='IE=edge' http-equiv='X-UA-Compatible'> - <meta content='no' name='msapplication-tap-highlight'> - <meta content='Aparapi is an Open-source framework for executing native Java code on the GPU, developed by Syncleus.' name='description'> - <title> - Aparapi | Explicit Buffer Handling - </title> - <!-- Favicons --> - <link href='/images/favicon/apple-touch-icon-152x152.png' rel='apple-touch-icon-precomposed'> - <meta content='#FFFFFF' name='msapplication-TileColor'> - <meta content='/images/favicon/mstile-144x144.png' name='msapplication-TileImage'> - <link href='/images/favicon/favicon-32x32.png' rel='icon' sizes='32x32'> - <!-- Android 5 Chrome Color --> - <meta content='#EE6E73' name='theme-color'> - <!-- CSS --> - <link href='/stylesheets/highlight.css' media='screen,projection' rel='stylesheet' type='text/css'> - <link href='/stylesheets/style.css' media='screen,projection' rel='stylesheet' type='text/css'> - <link href='http://fonts.googleapis.com/css?family=Inconsolata' rel='stylesheet' type='text/css'> - <link href='http://fonts.googleapis.com/icon?family=Material+Icons' rel='stylesheet'> - </head> - <body> - <header> - <div class='container'> - <a class='button-collapse top-nav waves-effect waves-light circle hide-on-large-only' data-activates='nav-mobile' href='#'> - <i class='material-icons'>menu</i> - </a> - </div> - <ul class='side-nav fixed' id='nav-mobile'> - <li class='logo'> - <a class='brand-logo' href='/' id='logo-container'> - <object data='/images/logo.svg' id='front-page-logo' type='image/svg+xml'>Your browser does not support SVG</object> - </a> - </li> - <li class='search'> - <div class='search-wrapper card'> - <input id='search'> - <i class='material-icons'>search</i> - <div class='search-results'></div> - </div> - </li> - <li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/">Overview</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Introduction</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/about.html">About</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/getting-started.html">Getting Started</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/faq.html">FAQ</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Documentation</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/aparapi-patterns.html">Aparapi Patterns</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/choosing-specific-devices.html">Choosing Specific Devices</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/converting-java-to-opencl.html">Converting Java to OpenCL</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/emulating-multiple-entrypoints.html">Emulating Multiple Entrypoints</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/explicit-buffer-handling.html">Explicit Buffer Handling</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/hsa-enabled-lambda.html">HSA Enabled Lambda</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/kernel-guidelines.html">Kernel Guidelines</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/library-agent-duality.html">Library Agent Duality</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/new-features.html">New Features</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/opencl-bindings.html">OpenCL Bindings</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/private-memory-space.html">Private Memory Space</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/profiling-the-kernel.html">Profiling the Kernel</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/setting-up-hsa.html">Setting Up HSA</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/unit-tests.html">Unit Tests</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/using-hsa-simulator.html">Using HSA Simulator</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/constant-memory.html">Constant Memory</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/local-memory.html">Local Memory</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/multiple-dim-ranges.html">Multiple Dim Ranges</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Proposals</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/multiple-dim-nd-range.html">Multiple Dim ND Range</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/lambdas.html">Lambdas</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/address-space-with-buffers.html">Address Space with Buffers</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/extensions.html">Extensions</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/device.html">Device</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/multiple-entry-points.html">Multiple Entry Points</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/lambda-syntax.html">Lambda Syntax</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/showcase.html">Showcase</a></li></ul></li> - </ul> - </header> - <main> - <div class='section no-pad-bot' id='index-banner'> - <div class='container'> - <h1 class='header center-on-small-only'>Explicit Buffer Handling</h1> - <div class='row center'> - <h4 class='header col s12 light center'>How to minimize buffer transfers.</h4> - </div> - - </div> - - </div> - <div class='container'> - <p>Aparapi is designed to shield the Java developer from dealing with the underlying movement of data between the OpenCL host and device. Aparapi can analyze a kernel’s <code>run()</code> method and run-reachable methods to determine which primitive arrays to transfer to the GPU prior to execution, and which arrays to transfer back when the GPU execution is complete.</p> - - <p>Generally this strategy is both clean and performant. Aparapi will attempt to just do the right thing.</p> - - <p>However, occasionally the following code pattern is seen.</p> - <pre class="highlight java"><code> - <span class="kd">final</span> <span class="kt">int</span><span class="o">[]</span> <span class="n">hugeArray</span> <span class="o">=</span> <span class="k">new</span> <span class="kt">int</span><span class="o">[</span><span class="n">HUGE</span><span class="o">];</span> - <span class="n">Kernel</span> <span class="n">kernel</span><span class="o">=</span> <span class="k">new</span> <span class="n">Kernel</span><span class="o">(){</span> - <span class="o">...</span> <span class="c1">// reads/writes hugeArray</span> - <span class="o">};</span> - <span class="k">for</span> <span class="o">(</span><span class="kt">int</span> <span class="n">loop</span><span class="o">=</span><span class="mi">0</span><span class="o">;</span> <span class="n">loop</span> <span class="o"><</span><span class="n">MAXLOOP</span><span class="o">;</span> <span class="n">loop</span><span class="o">++){</span> - <span class="n">kernel</span><span class="o">.</span><span class="na">execute</span><span class="o">(</span><span class="n">HUGE</span><span class="o">);</span> - <span class="o">}</span> - </code></pre> - <p>This is a common pattern which unfortunately exposes an issue with Aparapi’s normal buffer handling.</p> - - <p>Although Aparapi does analyze the byte code of the <code>Kernel.run()</code> method (and any method reachable from <code>Kernel.run()</code>) Aparapi has no visibility to the call site. In the above code there is no way for Aparapi to detect that that hugeArray is not modified within the for loop body. Unfortunately, Aparapi must default to being ‘safe’ and copy the contents of hugeArray backwards and forwards to the GPU device.</p> - - <p>Here we add comments to indicate where the unnecessary buffer transfers take place.</p> - <pre class="highlight java"><code> - <span class="kd">final</span> <span class="kt">int</span><span class="o">[]</span> <span class="n">hugeArray</span> <span class="o">=</span> <span class="k">new</span> <span class="kt">int</span><span class="o">[</span><span class="n">HUGE</span><span class="o">];</span> - <span class="n">Kernel</span> <span class="n">kernel</span><span class="o">=</span> <span class="k">new</span> <span class="n">Kernel</span><span class="o">(){</span> - <span class="o">...</span> <span class="c1">// reads/writes hugeArray</span> - <span class="o">};</span> - <span class="k">for</span> <span class="o">(</span><span class="kt">int</span> <span class="n">loop</span><span class="o">=</span><span class="mi">0</span><span class="o">;</span> <span class="n">loop</span> <span class="o"><</span><span class="n">MAXLOOP</span><span class="o">;</span> <span class="n">loop</span><span class="o">++){</span> - <span class="c1">// copy hugeArray to GPU</span> - <span class="n">kernel</span><span class="o">.</span><span class="na">execute</span><span class="o">(</span><span class="n">HUGE</span><span class="o">);</span> - <span class="c1">// copy hugeArray back from the GPU</span> - <span class="o">}</span> - </code></pre> - <p>In reality hugeArray only needs to be copied to the GPU once (prior to the loop) and then once again when the loop has terminated.</p> - - <p>Here we use comments to indicated the 'optimal’ transfers.</p> - <pre class="highlight java"><code> - <span class="kd">final</span> <span class="kt">int</span><span class="o">[]</span> <span class="n">hugeArray</span> <span class="o">=</span> <span class="k">new</span> <span class="kt">int</span><span class="o">[</span><span class="n">HUGE</span><span class="o">];</span> - <span class="n">Kernel</span> <span class="n">kernel</span><span class="o">=</span> <span class="k">new</span> <span class="n">Kernel</span><span class="o">(){</span> - <span class="o">...</span> <span class="c1">// reads/writes hugeArray</span> - <span class="o">};</span> - <span class="c1">// Ideally transfer hugeArray to GPU here</span> - <span class="k">for</span> <span class="o">(</span><span class="kt">int</span> <span class="n">loop</span><span class="o">=</span><span class="mi">0</span><span class="o">;</span> <span class="n">loop</span> <span class="o"><</span><span class="n">MAXLOOP</span><span class="o">;</span> <span class="n">loop</span><span class="o">++){</span> - <span class="n">kernel</span><span class="o">.</span><span class="na">execute</span><span class="o">(</span><span class="n">HUGE</span><span class="o">);</span> - <span class="o">}</span> - <span class="c1">// Ideally transfer hugeArray back from GPU here</span> - </code></pre> - <p>Consider another common pattern</p> - <pre class="highlight java"><code> - <span class="kd">final</span> <span class="kt">int</span><span class="o">[]</span> <span class="n">hugeArray</span> <span class="o">=</span> <span class="k">new</span> <span class="kt">int</span><span class="o">[</span><span class="n">HUGE</span><span class="o">];</span> - <span class="kd">final</span> <span class="kt">int</span><span class="o">[]</span> <span class="n">done</span> <span class="o">=</span> <span class="k">new</span> <span class="kt">int</span><span class="o">[]{</span><span class="mi">0</span><span class="o">};</span> - <span class="n">Kernel</span> <span class="n">kernel</span><span class="o">=</span> <span class="k">new</span> <span class="n">Kernel</span><span class="o">(){</span> - <span class="o">...</span> <span class="c1">// reads/writes hugeArray and writes to done[0] when complete</span> - <span class="o">};</span> - <span class="n">done</span><span class="o">[</span><span class="mi">0</span><span class="o">]=</span><span class="mi">0</span><span class="o">;</span> - <span class="k">while</span> <span class="o">(</span><span class="n">done</span><span class="o">[</span><span class="mi">0</span><span class="o">]</span> <span class="o">==</span><span class="mi">0</span><span class="o">)){</span> - <span class="n">kernel</span><span class="o">.</span><span class="na">execute</span><span class="o">(</span><span class="n">HUGE</span><span class="o">);</span> - <span class="o">}</span> - </code></pre> - <p>This is a common pattern in reduce stages of map-reduce type problems. Essentially the developer wants to keep executing a kernel until some condition is met. For example, this may be seen in bitonic sort implementations and various financial applications.</p> - - <p>From the code it can be seen that the kernel reads and writes <code>hugeArray[]</code> array and uses the single item <code>done[]</code> array to indicate some form of convergence or completion.</p> - - <p>As we demonstrated above, by default Aparapi will transfer <code>done[]</code> and <code>hugeArray[]</code> to and from the GPU device each time <code>Kernel.execute(HUGE)</code> is executed.</p> - - <p>To demonstrate which buffers are being transfered, these copies are shown as comments in the following version of the code.</p> - <pre class="highlight java"><code> - <span class="kd">final</span> <span class="kt">int</span><span class="o">[]</span> <span class="n">hugeArray</span> <span class="o">=</span> <span class="k">new</span> <span class="kt">int</span><span class="o">[</span><span class="n">HUGE</span><span class="o">];</span> - <span class="kd">final</span> <span class="kt">int</span><span class="o">[]</span> <span class="n">done</span> <span class="o">=</span> <span class="k">new</span> <span class="kt">int</span><span class="o">[]{</span><span class="mi">0</span><span class="o">};</span> - <span class="n">Kernel</span> <span class="n">kernel</span><span class="o">=</span> <span class="k">new</span> <span class="n">Kernel</span><span class="o">(){</span> - <span class="o">...</span> <span class="c1">// reads/writes hugeArray and writes to done[0] when complete</span> - <span class="o">};</span> - <span class="n">done</span><span class="o">[</span><span class="mi">0</span><span class="o">]=</span><span class="mi">0</span><span class="o">;</span> - <span class="k">while</span> <span class="o">(</span><span class="n">done</span><span class="o">[</span><span class="mi">0</span><span class="o">]</span> <span class="o">==</span><span class="mi">0</span><span class="o">)){</span> - <span class="c1">// Send done[] to GPU</span> - <span class="c1">// Send hugeArray[] to GPU</span> - <span class="n">kernel</span><span class="o">.</span><span class="na">execute</span><span class="o">(</span><span class="n">HUGE</span><span class="o">);</span> - <span class="c1">// Fetch done[] from GPU</span> - <span class="c1">// Fetch hugeArray[] from GPU</span> - <span class="o">}</span> - </code></pre> - <p>Further analysis of the code reveals that <code>hugeArray[]</code> is not accessed by the loop containing the kernel execution, so Aparapi is performing 999 unnecessary transfers to the device and 999 unnecessary transfers back. Only two transfers of <code>hugeArray[]</code> are needed; one to move the initial data to the GPU and one to move it back after the loop terminates.</p> - - <p>The <code>done[]</code> array is accessed during each iteration (although never written to within the loop), so it does need to be transferred back for each return from Kernel.execute(), however, it only needs to be sent once.</p> - - <p>Clearly it is better to avoid unnecessary transfers, especially of large buffers like <code>hugeArray[]</code>.</p> - - <p>Aparapi exposes a feature which allows the developer to control these situations and explicitly manage transfers.</p> - - <p>To use this feature first the developer needs to 'turn on’ explicit mode, using the <code>kernel.setExplicit(true)</code> method. Then the developer can request buffer/array transfers using either <code>kernel.put()</code> or <code>kernel.get()</code>. <code>Kernel.put()</code> forces a transfer to the GPU device and Kernel.get() transfers data back.</p> - - <p>The following code illustrates the use of these new explicit buffer management APIs.</p> - <pre class="highlight java"><code> - <span class="kd">final</span> <span class="kt">int</span><span class="o">[]</span> <span class="n">hugeArray</span> <span class="o">=</span> <span class="k">new</span> <span class="kt">int</span><span class="o">[</span><span class="n">HUGE</span><span class="o">];</span> - <span class="kd">final</span> <span class="kt">int</span><span class="o">[]</span> <span class="n">done</span> <span class="o">=</span> <span class="k">new</span> <span class="kt">int</span><span class="o">[]{</span><span class="mi">0</span><span class="o">};</span> - <span class="n">Kernel</span> <span class="n">kernel</span><span class="o">=</span> <span class="k">new</span> <span class="n">Kernel</span><span class="o">(){</span> - <span class="o">...</span> <span class="c1">// reads/writes hugeArray and writes to done[0] when complete</span> - <span class="o">};</span> - <span class="n">kernel</span><span class="o">.</span><span class="na">setExplicit</span><span class="o">(</span><span class="kc">true</span><span class="o">);</span> - <span class="n">done</span><span class="o">[</span><span class="mi">0</span><span class="o">]=</span><span class="mi">0</span><span class="o">;</span> - <span class="n">kernel</span><span class="o">.</span><span class="na">put</span><span class="o">(</span><span class="n">done</span><span class="o">);</span> - <span class="n">kernel</span><span class="o">.</span><span class="na">put</span><span class="o">(</span><span class="n">hugeArray</span><span class="o">);</span> - <span class="k">while</span> <span class="o">(</span><span class="n">done</span><span class="o">[</span><span class="mi">0</span><span class="o">]</span> <span class="o">==</span><span class="mi">0</span><span class="o">)){</span> - <span class="n">kernel</span><span class="o">.</span><span class="na">execute</span><span class="o">(</span><span class="n">HUGE</span><span class="o">);</span> - <span class="n">kernel</span><span class="o">.</span><span class="na">get</span><span class="o">(</span><span class="n">done</span><span class="o">);</span> - <span class="o">}</span> - <span class="n">kernel</span><span class="o">.</span><span class="na">get</span><span class="o">(</span><span class="n">hugeArray</span><span class="o">);</span> - </code></pre> - <p>Note that marking a kernel as explicit and failing to request the appropriate transfer is a programmer error.</p> - - <p>We deliberately made <code>Kernel.put(...)</code>, <code>Kernel.get(...)</code> and <code>Kernel.execute(range)</code> return an instance of the executing kernel to allow these calls be chained. Some may find this fluent style API more expressive.</p> - <pre class="highlight java"><code> - <span class="kd">final</span> <span class="kt">int</span><span class="o">[]</span> <span class="n">hugeArray</span> <span class="o">=</span> <span class="k">new</span> <span class="kt">int</span><span class="o">[</span><span class="n">HUGE</span><span class="o">];</span> - <span class="kd">final</span> <span class="kt">int</span><span class="o">[]</span> <span class="n">done</span> <span class="o">=</span> <span class="k">new</span> <span class="kt">int</span><span class="o">[]{</span><span class="mi">0</span><span class="o">};</span> - <span class="n">Kernel</span> <span class="n">kernel</span><span class="o">=</span> <span class="k">new</span> <span class="n">Kernel</span><span class="o">(){</span> - <span class="o">...</span> <span class="c1">// reads/writes hugeArray and writes to done[0] when complete</span> - <span class="o">};</span> - <span class="n">kernel</span><span class="o">.</span><span class="na">setExplicit</span><span class="o">(</span><span class="kc">true</span><span class="o">);</span> - <span class="n">done</span><span class="o">[</span><span class="mi">0</span><span class="o">]=</span><span class="mi">0</span><span class="o">;</span> - <span class="n">kernel</span><span class="o">.</span><span class="na">put</span><span class="o">(</span><span class="n">done</span><span class="o">).</span><span class="na">put</span><span class="o">(</span><span class="n">hugeArray</span><span class="o">);</span> <span class="c1">// chained puts</span> - <span class="k">while</span> <span class="o">(</span><span class="n">done</span><span class="o">[</span><span class="mi">0</span><span class="o">]</span> <span class="o">==</span><span class="mi">0</span><span class="o">)){</span> - <span class="n">kernel</span><span class="o">.</span><span class="na">execute</span><span class="o">(</span><span class="n">HUGE</span><span class="o">).</span><span class="na">get</span><span class="o">(</span><span class="n">done</span><span class="o">);</span> <span class="c1">// chained execute and put</span> - <span class="o">}</span> - <span class="n">kernel</span><span class="o">.</span><span class="na">get</span><span class="o">(</span><span class="n">hugeArray</span><span class="o">);</span> - </code></pre> - <p>An alternate approach for loops containing a single <code>kernel.execute(range)</code> call. - One variant of code which would normally suggest the use of Explicit Buffer Management can be handled differently. For cases where <code>Kernel.execute(range)</code> is the sole statement inside a loop and where the iteration count is known prior to the first iteration we offer an alternate (hopefully more elegant) way of minimizing buffer transfers.</p> - - <p>So for cases like:-</p> - <pre class="highlight java"><code> - <span class="kd">final</span> <span class="kt">int</span><span class="o">[]</span> <span class="n">hugeArray</span> <span class="o">=</span> <span class="k">new</span> <span class="kt">int</span><span class="o">[</span><span class="n">HUGE</span><span class="o">];</span> - <span class="n">Kernel</span> <span class="n">kernel</span><span class="o">=</span> <span class="k">new</span> <span class="n">Kernel</span><span class="o">(){</span> - <span class="o">...</span> <span class="c1">// reads/writes hugeArray</span> - <span class="o">};</span> - - <span class="k">for</span> <span class="o">(</span><span class="kt">int</span> <span class="n">pass</span><span class="o">=</span><span class="mi">0</span><span class="o">;</span> <span class="n">pass</span><span class="o"><</span><span class="mi">1000</span><span class="o">;</span> <span class="n">pass</span><span class="o">++){</span> - <span class="n">kernel</span><span class="o">.</span><span class="na">execute</span><span class="o">(</span><span class="n">HUGE</span><span class="o">);</span> - <span class="o">}</span> - </code></pre> - <p>The developer can request that Aparapi perform the outer loop rather than coding the loop. This is achieved explicitly by passing the iteration count as the second argument to <code>Kernel.execute(range, iterations)</code>.</p> - - <p>Now any form of code that looks like :-</p> - <pre class="highlight java"><code> - <span class="kt">int</span> <span class="n">range</span> <span class="o">=</span> <span class="mi">1024</span><span class="o">;</span> - <span class="kt">int</span> <span class="n">loopCount</span> <span class="o">=</span> <span class="mi">64</span><span class="o">;</span> - <span class="k">for</span> <span class="o">(</span><span class="kt">int</span> <span class="n">passId</span> <span class="o">=</span> <span class="mi">0</span><span class="o">;</span> <span class="n">passId</span> <span class="o"><</span> <span class="n">loopCount</span><span class="o">;</span> <span class="n">passId</span><span class="o">++){</span> - <span class="n">kernel</span><span class="o">.</span><span class="na">execute</span><span class="o">(</span><span class="n">range</span><span class="o">);</span> - <span class="o">}</span> - </code></pre> - <p>Can be replaced with</p> - <pre class="highlight java"><code> - <span class="kt">int</span> <span class="n">range</span> <span class="o">=</span> <span class="mi">1024</span><span class="o">;</span> - <span class="kt">int</span> <span class="n">loopCount</span> <span class="o">=</span> <span class="mi">64</span><span class="o">;</span> - - <span class="n">kernel</span><span class="o">.</span><span class="na">execute</span><span class="o">(</span><span class="n">range</span><span class="o">,</span> <span class="n">loopCount</span><span class="o">);</span> - </code></pre> - <p>Not only does this make the code more compact and avoids the use of explicit buffer management APIs, it allows Aparapi visibility to the complete loop so that Aparapi can minimize the number of transfers. Aparapi will only transfer buffers to the GPU once and transfer them back once, resulting in improved performance.</p> - - <p>Sometimes kernel code using this loop-pattern needs to track the current iteration number as the code passed through the outer loop. Previously we would be forced to use explicit buffer management to allow the kernel to do this.</p> - - <p>The code for this would have looked something like</p> - <pre class="highlight java"><code> - <span class="kt">int</span> <span class="n">range</span> <span class="o">=</span> <span class="mi">1024</span><span class="o">;</span> - <span class="kt">int</span> <span class="n">loopCount</span> <span class="o">=</span> <span class="mi">64</span><span class="o">;</span> - <span class="kd">final</span> <span class="kt">int</span><span class="o">[]</span> <span class="n">hugeArray</span> <span class="o">=</span> <span class="k">new</span> <span class="kt">int</span><span class="o">[</span><span class="n">HUGE</span><span class="o">];</span> - <span class="kd">final</span> <span class="kt">int</span><span class="o">[]</span> <span class="n">passId</span> <span class="o">=</span> <span class="k">new</span> <span class="kt">int</span><span class="o">[</span><span class="mi">0</span><span class="o">];</span> - <span class="n">Kernel</span> <span class="n">kernel</span> <span class="o">=</span> <span class="k">new</span> <span class="n">Kernel</span><span class="o">(){</span> - <span class="nd">@Override</span> <span class="kd">public</span> <span class="kt">void</span> <span class="nf">run</span><span class="o">(){</span> - <span class="kt">int</span> <span class="n">id</span><span class="o">=</span><span class="n">getGlobalId</span><span class="o">();</span> - <span class="k">if</span> <span class="o">(</span><span class="n">passId</span><span class="o">[</span><span class="mi">0</span><span class="o">]</span> <span class="o">==</span> <span class="mi">0</span><span class="o">){</span> - <span class="c1">// perform some initialization!</span> - <span class="o">}</span> - <span class="o">...</span> <span class="c1">// reads/writes hugeArray</span> - <span class="o">}</span> - <span class="o">};</span> - <span class="n">Kernel</span><span class="o">.</span><span class="na">setExplicit</span><span class="o">(</span><span class="kc">true</span><span class="o">);</span> - <span class="n">kernel</span><span class="o">.</span><span class="na">put</span><span class="o">(</span><span class="n">hugeArray</span><span class="o">);</span> - <span class="k">for</span> <span class="o">(</span><span class="n">passId</span><span class="o">[</span><span class="mi">0</span><span class="o">]=</span><span class="mi">0</span><span class="o">;</span> <span class="n">passId</span><span class="o">[</span><span class="mi">0</span><span class="o">]<</span><span class="n">loopCount</span><span class="o">;</span> <span class="n">passId</span><span class="o">[</span><span class="mi">0</span><span class="o">]++){</span> - - <span class="n">kernel</span><span class="o">.</span><span class="na">put</span><span class="o">(</span><span class="n">passId</span><span class="o">).</span><span class="na">execute</span><span class="o">(</span><span class="n">range</span><span class="o">);</span> - <span class="o">}</span> - </code></pre> - <p>In the current version of Aparapi we added <code>Kernel.getPassId()</code> to allow a Kernel to determine the current ‘pass’ through the outer loop without having to use explicit buffer management.</p> - - <p>So the previous code can now be written without any explicit buffer management APIs:-</p> - <pre class="highlight java"><code> - <span class="kd">final</span> <span class="kt">int</span><span class="o">[]</span> <span class="n">hugeArray</span> <span class="o">=</span> <span class="k">new</span> <span class="kt">int</span><span class="o">[</span><span class="n">HUGE</span><span class="o">];</span> - <span class="kd">final</span> <span class="kt">int</span><span class="o">[]</span> <span class="n">pass</span><span class="o">[]</span> <span class="o">=</span> <span class="k">new</span> <span class="kt">int</span><span class="o">[]{</span><span class="mi">0</span><span class="o">};</span> - <span class="n">Kernel</span> <span class="n">kernel</span> <span class="o">=</span> <span class="k">new</span> <span class="n">Kernel</span><span class="o">(){</span> - <span class="nd">@Override</span> <span class="kd">public</span> <span class="kt">void</span> <span class="nf">run</span><span class="o">(){</span> - <span class="kt">int</span> <span class="n">id</span> <span class="o">=</span> <span class="n">getGlobalId</span><span class="o">();</span> - <span class="kt">int</span> <span class="n">pass</span> <span class="o">=</span> <span class="n">getPassId</span><span class="o">();</span> - <span class="k">if</span> <span class="o">(</span><span class="n">pass</span> <span class="o">==</span> <span class="mi">0</span><span class="o">){</span> - <span class="c1">// perform some initialization!</span> - <span class="o">}</span> - <span class="o">...</span> <span class="c1">// reads/writes both hugeArray</span> - <span class="o">}</span> - <span class="o">};</span> - - <span class="n">kernel</span><span class="o">.</span><span class="na">execute</span><span class="o">(</span><span class="n">HUGE</span><span class="o">,</span> <span class="mi">1000</span><span class="o">);</span> - </code></pre> - <p>One common use for Kernel.getPassId() is to avoid flipping buffers in the outer loop.</p> - - <p>It is common for kernels to process data from one buffer to another, and in the next invocation process the data back the other way. Now these kernels can use the passId (odd or even) to determine the direction of data transfer.</p> - <pre class="highlight java"><code> - <span class="kd">final</span> <span class="kt">int</span><span class="o">[]</span> <span class="n">arr1</span> <span class="o">=</span> <span class="k">new</span> <span class="kt">int</span><span class="o">[</span><span class="n">HUGE</span><span class="o">];</span> - <span class="kd">final</span> <span class="kt">int</span><span class="o">[]</span> <span class="n">arr2</span> <span class="o">=</span> <span class="k">new</span> <span class="kt">int</span><span class="o">[</span><span class="n">HUGE</span><span class="o">];</span> - <span class="n">Kernel</span> <span class="n">kernel</span> <span class="o">=</span> <span class="k">new</span> <span class="n">Kernel</span><span class="o">(){</span> - <span class="kt">int</span> <span class="nf">f</span><span class="o">(</span><span class="kt">int</span> <span class="n">v</span><span class="o">){</span> <span class="err">…</span> <span class="o">}</span> - - <span class="nd">@Override</span> <span class="kd">public</span> <span class="kt">void</span> <span class="nf">run</span><span class="o">(){</span> - <span class="kt">int</span> <span class="n">id</span> <span class="o">=</span> <span class="n">getGlobalId</span><span class="o">();</span> - <span class="kt">int</span> <span class="n">pass</span> <span class="o">=</span> <span class="n">getPassId</span><span class="o">();</span> - <span class="k">if</span> <span class="o">(</span><span class="n">pass</span> <span class="o">%</span> <span class="mi">2</span> <span class="o">==</span> <span class="mi">0</span><span class="o">){</span> - <span class="n">arr1</span><span class="o">[</span><span class="n">id</span><span class="o">]</span> <span class="o">=</span> <span class="n">f</span><span class="o">(</span><span class="n">arr2</span><span class="o">[</span><span class="n">id</span><span class="o">]);</span> - <span class="o">}</span><span class="k">else</span><span class="o">{</span> - <span class="n">arr2</span><span class="o">[</span><span class="n">id</span><span class="o">]</span> <span class="o">=</span> <span class="n">f</span><span class="o">(</span><span class="n">arr1</span><span class="o">[</span><span class="n">id</span><span class="o">]);</span> - - <span class="o">}</span> - <span class="o">}</span> - <span class="o">};</span> - - <span class="n">kernel</span><span class="o">.</span><span class="na">execute</span><span class="o">(</span><span class="n">HUGE</span><span class="o">,</span> <span class="mi">1000</span><span class="o">);</span> - </code></pre> - </div> - </main> - <footer class='page-footer'> - <div class='container'> - <div class='row'> - <div class='col l4 s12'> - <h5 class='white-text'>Help Aparapi Grow</h5> - <p class='grey-text text-lighten-4'>We are a team of volunteers working on this project like it's our full time job. Any amount would help support and continue development on this project and is greatly appreciated.</p> - <form action='https://www.paypal.com/cgi-bin/webscr' id='paypal-donate' method='post' target='_top'> - <input name='cmd' type='hidden' value='_s-xclick'> - <input name='encrypted' type='hidden' value='-----BEGIN PKCS7-----MIIHoAYJKoZIhvcNAQcEoIIHkTCCB40CAQExggEwMIIBLAIBADCBlDCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20CAQAwDQYJKoZIhvcNAQEBBQAEgYATcKxN8t35TG2x34eY272SuZO3QbGy+BTGIM5DRV6Hmosotzw2TF42ceWmbXb3Gk4Wy5kUgo4TgHExCZHUSlHUl+A9KWLFejotgQJPhbiBsnns3klWbKftA3LEnP/kz/SW7OyBlpluoHoEGb354/aoX3JEctp3akHiZEmD7JyEgjELMAkGBSsOAwIaBQAwggEcBgkqhkiG9w0BBwEwFAYIKoZIhvcNAwcECOGCJwba6JICgIH4RtE1LE3juagKs+swI5tb9Y2LacWo+qn1H1aLKeg57bQMqqcWYvkoO1joYoglPc1h4mO0egZjHPQ6ih0K0IYlXw2SRpNylSlIMUE3GW6smjSSwRhscZfXQYUnmQsfYvkFwoKrlZGf/1u0Q7nwlZ1szIKnDMZ5f+k8xBcM0sMNutn/y9CH6A3zo01gQBIF29+1WYAoQspNAnfWQy3ydV7nbjIA9ThDp2WquWw3EVlvqlvm/3C2AFuH/L4q0ltn3qjkCdzXK0O2jW3TRrzligPkAy6CN0Tw2jGW5GENNC1L92vHFH4kBXUPlhvw39TgoN7/KRUjVoYPYgugggOHMIIDgzCCAuygAwIBAgIBADANBgkqhkiG9w0BAQUFADCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20wHhcNMDQwMjEzMTAxMzE1WhcNMzUwMjEzMTAxMzE1WjCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20wgZ8wDQYJKoZIhvcNAQEBBQADgY0AMIGJAoGBAMFHTt38RMxLXJyO2SmS+Ndl72T7oKJ4u4uw+6awntALWh03PewmIJuzbALScsTS4sZoS1fKciBGoh11gIfHzylvkdNe/hJl66/RGqrj5rFb08sAABNTzDTiqqNpJeBsYs/c2aiGozptX2RlnBktH+SUNpAajW724Nv2Wvhif6sFAgMBAAGjge4wgeswHQYDVR0OBBYEFJaffLvGbxe9WT9S1wob7BDWZJRrMIG7BgNVHSMEgbMwgbCAFJaffLvGbxe9WT9S1wob7BDWZJRroYGUpIGRMIGOMQswCQYDVQQGEwJVUzELMAkGA1UECBMCQ0ExFjAUBgNVBAcTDU1vdW50YWluIFZpZXcxFDASBgNVBAoTC1BheVBhbCBJbmMuMRMwEQYDVQQLFApsaXZlX2NlcnRzMREwDwYDVQQDFAhsaXZlX2FwaTEcMBoGCSqGSIb3DQEJARYNcmVAcGF5cGFsLmNvbYIBADAMBgNVHRMEBTADAQH/MA0GCSqGSIb3DQEBBQUAA4GBAIFfOlaagFrl71+jq6OKidbWFSE+Q4FqROvdgIONth+8kSK//Y/4ihuE4Ymvzn5ceE3S/iBSQQMjyvb+s2TWbQYDwcp129OPIbD9epdr4tJOUNiSojw7BHwYRiPh58S1xGlFgHFXwrEBb3dgNbMUa+u4qectsMAXpVHnD9wIyfmHMYIBmjCCAZYCAQEwgZQwgY4xCzAJBgNVBAYTAlVTMQswCQYDVQQIEwJDQTEWMBQGA1UEBxMNTW91bnRhaW4gVmlldzEUMBIGA1UEChMLUGF5UGFsIEluYy4xEzARBgNVBAsUCmxpdmVfY2VydHMxETAPBgNVBAMUCGxpdmVfYXBpMRwwGgYJKoZIhvcNAQkBFg1yZUBwYXlwYWwuY29tAgEAMAkGBSsOAwIaBQCgXTAYBgkqhkiG9w0BCQMxCwYJKoZIhvcNAQcBMBwGCSqGSIb3DQEJBTEPFw0xNjExMjkyMjA1NTNaMCMGCSqGSIb3DQEJBDEWBBS4i3Exr/pFcKOJy8uKmH+nGIMjqDANBgkqhkiG9w0BAQEFAASBgDAbFZ2jieloeB/0wCAcvYCFAIXmmBaMS5js/byzU5gK7exSTlRMX74IkmHemItaOcw3wyFlu4i118D9K9SbSbFiX9DGDcezGh42u/6G8TuZMwlvmiehwMioTVcm4jWG40YLiv8pJZypfoSx2w4IAFb4na5i/E1qOrwQOpiBho+s-----END PKCS7-----'> - <button alt='PayPal - The safer, easier way to pay online!' class='btn waves-effect waves-light red lighten-3' name='action' type='submit'> - Donate Now - </button> - </form> - </div> - <div class='col l4 s12'> - <h5 class='white-text'>Join the Discussion</h5> - <p class='grey-text text-lighten-4'>We have a Gitter chat room set up where you can talk directly with us. Come in and discuss new features, future goals, general problems or questions, or anything else you can think of.</p> - <a class='btn waves-effect waves-light red lighten-3' href='https://gitter.im/Syncleus/aparapi' target='_blank'>Chat</a> - </div> - <div class='col l4 s12' style='overflow: hidden;'> - <h5 class='white-text'>Connect</h5> - <iframe allowtransparency='true' frameborder='0' height='30' scrolling='0' src='http://ghbtns.com/github-btn.html?user=Syncleus&repo=aparapi&type=watch&count=true&size=large' width='170'></iframe> - <br> - <a class='twitter-follow-button' data-dnt='true' data-show-count='true' data-size='large' href='https://twitter.com/AparapiLib'>Follow @AparapiLib</a> - <br> - <div class='g-follow' data-annotation='bubble' data-height='24' data-href='https://plus.google.com/102266131584900704956' data-rel='publisher'></div> - </div> - </div> - </div> - <div class='footer-copyright'> - <div class='container'> - © 2016-2017 Syncleus, All rights reserved. - <a class='grey-text text-lighten-4 right' href='https://github.com/Syncleus/aparapi/blob/master/LICENSE'>Apache License v2</a> - </div> - </div> - </footer> - <!-- Scripts --> - <script src='https://code.jquery.com/jquery-2.1.4.min.js'></script> - <script> - if (!window.jQuery) { document.write('<script src="bin/jquery-2.1.1.min.js"><\/script>'); } - </script> - <script src='/javascripts/jquery.timeago.js'></script> - <script src='/javascripts/materialize.min.js'></script> - <script src='/javascripts/lunr.min.js'></script> - <script src='/javascripts/search.js'></script> - <script src='/javascripts/materialize.js'></script> - <script src='/javascripts/init.js'></script> - <!-- Twitter Button --> - <script> - !function(d,s,id){var js,fjs=d.getElementsByTagName(s)[0],p=/^http:/.test(d.location)?'http':'https';if(!d.getElementById(id)){js=d.createElement(s);js.id=id;js.src=p+'://platform.twitter.com/widgets.js';fjs.parentNode.insertBefore(js,fjs);}}(document, 'script', 'twitter-wjs'); - </script> - <!-- Google Plus Button --> - <script async='' defer='defer' src='https://apis.google.com/js/platform.js'></script> - </body> +<head> +<meta content='text/html; charset=UTF-8' http-equiv='Content-Type'> +<meta content='width=device-width, initial-scale=1' name='viewport'> +<meta content='IE=edge' http-equiv='X-UA-Compatible'> +<meta content='no' name='msapplication-tap-highlight'> +<meta content='Aparapi is an Open-source framework for executing native Java code on the GPU, developed by Syncleus.' name='description'> +<title> +Aparapi | Explicit Buffer Handling +</title> +<!-- Favicons --> +<link href='/images/favicon/apple-touch-icon-152x152.png' rel='apple-touch-icon-precomposed'> +<meta content='#FFFFFF' name='msapplication-TileColor'> +<meta content='/images/favicon/mstile-144x144.png' name='msapplication-TileImage'> +<link href='/images/favicon/favicon-32x32.png' rel='icon' sizes='32x32'> +<!-- Android 5 Chrome Color --> +<meta content='#EE6E73' name='theme-color'> +<!-- CSS --> +<link href='/stylesheets/highlight.css' media='screen,projection' rel='stylesheet' type='text/css'> +<link href='/stylesheets/style.css' media='screen,projection' rel='stylesheet' type='text/css'> +<link href='http://fonts.googleapis.com/css?family=Inconsolata' rel='stylesheet' type='text/css'> +<link href='http://fonts.googleapis.com/icon?family=Material+Icons' rel='stylesheet'> +</head> +<body> +<header> +<div class='container'> +<a class='button-collapse top-nav waves-effect waves-light circle hide-on-large-only' data-activates='nav-mobile' href='#'> +<i class='material-icons'>menu</i> +</a> +</div> +<ul class='side-nav fixed' id='nav-mobile'> +<li class='logo'> +<a class='brand-logo' href='/' id='logo-container'> +<object data='/images/logo.svg' id='front-page-logo' type='image/svg+xml'>Your browser does not support SVG</object> +</a> +</li> +<li class='search'> +<div class='search-wrapper card'> +<input id='search'> +<i class='material-icons'>search</i> +<div class='search-results'></div> +</div> +</li> +<li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/">Overview</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Introduction</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/about.html">About</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/getting-started.html">Getting Started</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/faq.html">FAQ</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Documentation</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/aparapi-patterns.html">Aparapi Patterns</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/choosing-specific-devices.html">Choosing Specific Devices</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/converting-java-to-opencl.html">Converting Java to OpenCL</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/emulating-multiple-entrypoints.html">Emulating Multiple Entrypoints</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/explicit-buffer-handling.html">Explicit Buffer Handling</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/hsa-enabled-lambda.html">HSA Enabled Lambda</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/kernel-guidelines.html">Kernel Guidelines</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/library-agent-duality.html">Library Agent Duality</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/new-features.html">New Features</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/opencl-bindings.html">OpenCL Bindings</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/private-memory-space.html">Private Memory Space</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/profiling-the-kernel.html">Profiling the Kernel</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/setting-up-hsa.html">Setting Up HSA</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/unit-tests.html">Unit Tests</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/using-hsa-simulator.html">Using HSA Simulator</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/constant-memory.html">Constant Memory</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/local-memory.html">Local Memory</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/multiple-dim-ranges.html">Multiple Dim Ranges</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Proposals</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/multiple-dim-nd-range.html">Multiple Dim ND Range</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/lambdas.html">Lambdas</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/address-space-with-buffers.html">Address Space with Buffers</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/extensions.html">Extensions</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/device.html">Device</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/multiple-entry-points.html">Multiple Entry Points</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/lambda-syntax.html">Lambda Syntax</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/showcase.html">Showcase</a></li></ul></li> +</ul> +</header> +<main> +<div class='section no-pad-bot' id='index-banner'> +<div class='container'> +<h1 class='header center-on-small-only'>Explicit Buffer Handling</h1> +<div class='row center'> +<h4 class='header col s12 light center'>How to minimize buffer transfers.</h4> +</div> + +</div> + +</div> +<div class='container'> +<p>Aparapi is designed to shield the Java developer from dealing with the underlying movement of data between the OpenCL host and device. Aparapi can analyze a kernel’s <code>run()</code> method and run-reachable methods to determine which primitive arrays to transfer to the GPU prior to execution, and which arrays to transfer back when the GPU execution is complete.</p> + +<p>Generally this strategy is both clean and performant. Aparapi will attempt to just do the right thing.</p> + +<p>However, occasionally the following code pattern is seen.</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="kd">final</span> <span class="kt">int</span><span class="o">[]</span> <span class="n">hugeArray</span> <span class="o">=</span> <span class="k">new</span> <span class="kt">int</span><span class="o">[</span><span class="n">HUGE</span><span class="o">];</span> +<span class="n">Kernel</span> <span class="n">kernel</span><span class="o">=</span> <span class="k">new</span> <span class="n">Kernel</span><span class="o">(){</span> + <span class="o">...</span> <span class="c1">// reads/writes hugeArray</span> +<span class="o">};</span> +<span class="k">for</span> <span class="o">(</span><span class="kt">int</span> <span class="n">loop</span><span class="o">=</span><span class="mi">0</span><span class="o">;</span> <span class="n">loop</span> <span class="o"><</span><span class="n">MAXLOOP</span><span class="o">;</span> <span class="n">loop</span><span class="o">++){</span> + <span class="n">kernel</span><span class="o">.</span><span class="na">execute</span><span class="o">(</span><span class="n">HUGE</span><span class="o">);</span> +<span class="o">}</span> +</code></pre></div> +<p>This is a common pattern which unfortunately exposes an issue with Aparapi’s normal buffer handling.</p> + +<p>Although Aparapi does analyze the byte code of the <code>Kernel.run()</code> method (and any method reachable from <code>Kernel.run()</code>) Aparapi has no visibility to the call site. In the above code there is no way for Aparapi to detect that that hugeArray is not modified within the for loop body. Unfortunately, Aparapi must default to being ‘safe’ and copy the contents of hugeArray backwards and forwards to the GPU device.</p> + +<p>Here we add comments to indicate where the unnecessary buffer transfers take place.</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="kd">final</span> <span class="kt">int</span><span class="o">[]</span> <span class="n">hugeArray</span> <span class="o">=</span> <span class="k">new</span> <span class="kt">int</span><span class="o">[</span><span class="n">HUGE</span><span class="o">];</span> +<span class="n">Kernel</span> <span class="n">kernel</span><span class="o">=</span> <span class="k">new</span> <span class="n">Kernel</span><span class="o">(){</span> + <span class="o">...</span> <span class="c1">// reads/writes hugeArray</span> +<span class="o">};</span> +<span class="k">for</span> <span class="o">(</span><span class="kt">int</span> <span class="n">loop</span><span class="o">=</span><span class="mi">0</span><span class="o">;</span> <span class="n">loop</span> <span class="o"><</span><span class="n">MAXLOOP</span><span class="o">;</span> <span class="n">loop</span><span class="o">++){</span> + <span class="c1">// copy hugeArray to GPU</span> + <span class="n">kernel</span><span class="o">.</span><span class="na">execute</span><span class="o">(</span><span class="n">HUGE</span><span class="o">);</span> + <span class="c1">// copy hugeArray back from the GPU</span> +<span class="o">}</span> +</code></pre></div> +<p>In reality hugeArray only needs to be copied to the GPU once (prior to the loop) and then once again when the loop has terminated.</p> + +<p>Here we use comments to indicated the ‘optimal’ transfers.</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="kd">final</span> <span class="kt">int</span><span class="o">[]</span> <span class="n">hugeArray</span> <span class="o">=</span> <span class="k">new</span> <span class="kt">int</span><span class="o">[</span><span class="n">HUGE</span><span class="o">];</span> +<span class="n">Kernel</span> <span class="n">kernel</span><span class="o">=</span> <span class="k">new</span> <span class="n">Kernel</span><span class="o">(){</span> + <span class="o">...</span> <span class="c1">// reads/writes hugeArray</span> +<span class="o">};</span> +<span class="c1">// Ideally transfer hugeArray to GPU here</span> +<span class="k">for</span> <span class="o">(</span><span class="kt">int</span> <span class="n">loop</span><span class="o">=</span><span class="mi">0</span><span class="o">;</span> <span class="n">loop</span> <span class="o"><</span><span class="n">MAXLOOP</span><span class="o">;</span> <span class="n">loop</span><span class="o">++){</span> + <span class="n">kernel</span><span class="o">.</span><span class="na">execute</span><span class="o">(</span><span class="n">HUGE</span><span class="o">);</span> +<span class="o">}</span> +<span class="c1">// Ideally transfer hugeArray back from GPU here</span> +</code></pre></div> +<p>Consider another common pattern</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="kd">final</span> <span class="kt">int</span><span class="o">[]</span> <span class="n">hugeArray</span> <span class="o">=</span> <span class="k">new</span> <span class="kt">int</span><span class="o">[</span><span class="n">HUGE</span><span class="o">];</span> +<span class="kd">final</span> <span class="kt">int</span><span class="o">[]</span> <span class="n">done</span> <span class="o">=</span> <span class="k">new</span> <span class="kt">int</span><span class="o">[]{</span><span class="mi">0</span><span class="o">};</span> +<span class="n">Kernel</span> <span class="n">kernel</span><span class="o">=</span> <span class="k">new</span> <span class="n">Kernel</span><span class="o">(){</span> + <span class="o">...</span> <span class="c1">// reads/writes hugeArray and writes to done[0] when complete</span> +<span class="o">};</span> +<span class="n">done</span><span class="o">[</span><span class="mi">0</span><span class="o">]=</span><span class="mi">0</span><span class="o">;</span> +<span class="k">while</span> <span class="o">(</span><span class="n">done</span><span class="o">[</span><span class="mi">0</span><span class="o">]</span> <span class="o">==</span><span class="mi">0</span><span class="o">)){</span> + <span class="n">kernel</span><span class="o">.</span><span class="na">execute</span><span class="o">(</span><span class="n">HUGE</span><span class="o">);</span> +<span class="o">}</span> +</code></pre></div> +<p>This is a common pattern in reduce stages of map-reduce type problems. Essentially the developer wants to keep executing a kernel until some condition is met. For example, this may be seen in bitonic sort implementations and various financial applications.</p> + +<p>From the code it can be seen that the kernel reads and writes <code>hugeArray[]</code> array and uses the single item <code>done[]</code> array to indicate some form of convergence or completion.</p> + +<p>As we demonstrated above, by default Aparapi will transfer <code>done[]</code> and <code>hugeArray[]</code> to and from the GPU device each time <code>Kernel.execute(HUGE)</code> is executed.</p> + +<p>To demonstrate which buffers are being transfered, these copies are shown as comments in the following version of the code.</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="kd">final</span> <span class="kt">int</span><span class="o">[]</span> <span class="n">hugeArray</span> <span class="o">=</span> <span class="k">new</span> <span class="kt">int</span><span class="o">[</span><span class="n">HUGE</span><span class="o">];</span> +<span class="kd">final</span> <span class="kt">int</span><span class="o">[]</span> <span class="n">done</span> <span class="o">=</span> <span class="k">new</span> <span class="kt">int</span><span class="o">[]{</span><span class="mi">0</span><span class="o">};</span> +<span class="n">Kernel</span> <span class="n">kernel</span><span class="o">=</span> <span class="k">new</span> <span class="n">Kernel</span><span class="o">(){</span> + <span class="o">...</span> <span class="c1">// reads/writes hugeArray and writes to done[0] when complete</span> +<span class="o">};</span> +<span class="n">done</span><span class="o">[</span><span class="mi">0</span><span class="o">]=</span><span class="mi">0</span><span class="o">;</span> +<span class="k">while</span> <span class="o">(</span><span class="n">done</span><span class="o">[</span><span class="mi">0</span><span class="o">]</span> <span class="o">==</span><span class="mi">0</span><span class="o">)){</span> + <span class="c1">// Send done[] to GPU</span> + <span class="c1">// Send hugeArray[] to GPU</span> + <span class="n">kernel</span><span class="o">.</span><span class="na">execute</span><span class="o">(</span><span class="n">HUGE</span><span class="o">);</span> + <span class="c1">// Fetch done[] from GPU</span> + <span class="c1">// Fetch hugeArray[] from GPU</span> +<span class="o">}</span> +</code></pre></div> +<p>Further analysis of the code reveals that <code>hugeArray[]</code> is not accessed by the loop containing the kernel execution, so Aparapi is performing 999 unnecessary transfers to the device and 999 unnecessary transfers back. Only two transfers of <code>hugeArray[]</code> are needed; one to move the initial data to the GPU and one to move it back after the loop terminates.</p> + +<p>The <code>done[]</code> array is accessed during each iteration (although never written to within the loop), so it does need to be transferred back for each return from Kernel.execute(), however, it only needs to be sent once.</p> + +<p>Clearly it is better to avoid unnecessary transfers, especially of large buffers like <code>hugeArray[]</code>.</p> + +<p>Aparapi exposes a feature which allows the developer to control these situations and explicitly manage transfers.</p> + +<p>To use this feature first the developer needs to ‘turn on’ explicit mode, using the <code>kernel.setExplicit(true)</code> method. Then the developer can request buffer/array transfers using either <code>kernel.put()</code> or <code>kernel.get()</code>. <code>Kernel.put()</code> forces a transfer to the GPU device and Kernel.get() transfers data back.</p> + +<p>The following code illustrates the use of these new explicit buffer management APIs.</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="kd">final</span> <span class="kt">int</span><span class="o">[]</span> <span class="n">hugeArray</span> <span class="o">=</span> <span class="k">new</span> <span class="kt">int</span><span class="o">[</span><span class="n">HUGE</span><span class="o">];</span> +<span class="kd">final</span> <span class="kt">int</span><span class="o">[]</span> <span class="n">done</span> <span class="o">=</span> <span class="k">new</span> <span class="kt">int</span><span class="o">[]{</span><span class="mi">0</span><span class="o">};</span> +<span class="n">Kernel</span> <span class="n">kernel</span><span class="o">=</span> <span class="k">new</span> <span class="n">Kernel</span><span class="o">(){</span> + <span class="o">...</span> <span class="c1">// reads/writes hugeArray and writes to done[0] when complete</span> +<span class="o">};</span> +<span class="n">kernel</span><span class="o">.</span><span class="na">setExplicit</span><span class="o">(</span><span class="kc">true</span><span class="o">);</span> +<span class="n">done</span><span class="o">[</span><span class="mi">0</span><span class="o">]=</span><span class="mi">0</span><span class="o">;</span> +<span class="n">kernel</span><span class="o">.</span><span class="na">put</span><span class="o">(</span><span class="n">done</span><span class="o">);</span> +<span class="n">kernel</span><span class="o">.</span><span class="na">put</span><span class="o">(</span><span class="n">hugeArray</span><span class="o">);</span> +<span class="k">while</span> <span class="o">(</span><span class="n">done</span><span class="o">[</span><span class="mi">0</span><span class="o">]</span> <span class="o">==</span><span class="mi">0</span><span class="o">)){</span> + <span class="n">kernel</span><span class="o">.</span><span class="na">execute</span><span class="o">(</span><span class="n">HUGE</span><span class="o">);</span> + <span class="n">kernel</span><span class="o">.</span><span class="na">get</span><span class="o">(</span><span class="n">done</span><span class="o">);</span> +<span class="o">}</span> +<span class="n">kernel</span><span class="o">.</span><span class="na">get</span><span class="o">(</span><span class="n">hugeArray</span><span class="o">);</span> +</code></pre></div> +<p>Note that marking a kernel as explicit and failing to request the appropriate transfer is a programmer error.</p> + +<p>We deliberately made <code>Kernel.put(...)</code>, <code>Kernel.get(...)</code> and <code>Kernel.execute(range)</code> return an instance of the executing kernel to allow these calls be chained. Some may find this fluent style API more expressive.</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="kd">final</span> <span class="kt">int</span><span class="o">[]</span> <span class="n">hugeArray</span> <span class="o">=</span> <span class="k">new</span> <span class="kt">int</span><span class="o">[</span><span class="n">HUGE</span><span class="o">];</span> +<span class="kd">final</span> <span class="kt">int</span><span class="o">[]</span> <span class="n">done</span> <span class="o">=</span> <span class="k">new</span> <span class="kt">int</span><span class="o">[]{</span><span class="mi">0</span><span class="o">};</span> +<span class="n">Kernel</span> <span class="n">kernel</span><span class="o">=</span> <span class="k">new</span> <span class="n">Kernel</span><span class="o">(){</span> + <span class="o">...</span> <span class="c1">// reads/writes hugeArray and writes to done[0] when complete</span> +<span class="o">};</span> +<span class="n">kernel</span><span class="o">.</span><span class="na">setExplicit</span><span class="o">(</span><span class="kc">true</span><span class="o">);</span> +<span class="n">done</span><span class="o">[</span><span class="mi">0</span><span class="o">]=</span><span class="mi">0</span><span class="o">;</span> +<span class="n">kernel</span><span class="o">.</span><span class="na">put</span><span class="o">(</span><span class="n">done</span><span class="o">).</span><span class="na">put</span><span class="o">(</span><span class="n">hugeArray</span><span class="o">);</span> <span class="c1">// chained puts</span> +<span class="k">while</span> <span class="o">(</span><span class="n">done</span><span class="o">[</span><span class="mi">0</span><span class="o">]</span> <span class="o">==</span><span class="mi">0</span><span class="o">)){</span> + <span class="n">kernel</span><span class="o">.</span><span class="na">execute</span><span class="o">(</span><span class="n">HUGE</span><span class="o">).</span><span class="na">get</span><span class="o">(</span><span class="n">done</span><span class="o">);</span> <span class="c1">// chained execute and put</span> +<span class="o">}</span> +<span class="n">kernel</span><span class="o">.</span><span class="na">get</span><span class="o">(</span><span class="n">hugeArray</span><span class="o">);</span> +</code></pre></div> +<p>An alternate approach for loops containing a single <code>kernel.execute(range)</code> call. +One variant of code which would normally suggest the use of Explicit Buffer Management can be handled differently. For cases where <code>Kernel.execute(range)</code> is the sole statement inside a loop and where the iteration count is known prior to the first iteration we offer an alternate (hopefully more elegant) way of minimizing buffer transfers.</p> + +<p>So for cases like:-</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="kd">final</span> <span class="kt">int</span><span class="o">[]</span> <span class="n">hugeArray</span> <span class="o">=</span> <span class="k">new</span> <span class="kt">int</span><span class="o">[</span><span class="n">HUGE</span><span class="o">];</span> +<span class="n">Kernel</span> <span class="n">kernel</span><span class="o">=</span> <span class="k">new</span> <span class="n">Kernel</span><span class="o">(){</span> + <span class="o">...</span> <span class="c1">// reads/writes hugeArray</span> +<span class="o">};</span> + +<span class="k">for</span> <span class="o">(</span><span class="kt">int</span> <span class="n">pass</span><span class="o">=</span><span class="mi">0</span><span class="o">;</span> <span class="n">pass</span><span class="o"><</span><span class="mi">1000</span><span class="o">;</span> <span class="n">pass</span><span class="o">++){</span> + <span class="n">kernel</span><span class="o">.</span><span class="na">execute</span><span class="o">(</span><span class="n">HUGE</span><span class="o">);</span> +<span class="o">}</span> +</code></pre></div> +<p>The developer can request that Aparapi perform the outer loop rather than coding the loop. This is achieved explicitly by passing the iteration count as the second argument to <code>Kernel.execute(range, iterations)</code>.</p> + +<p>Now any form of code that looks like :-</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="kt">int</span> <span class="n">range</span> <span class="o">=</span> <span class="mi">1024</span><span class="o">;</span> +<span class="kt">int</span> <span class="n">loopCount</span> <span class="o">=</span> <span class="mi">64</span><span class="o">;</span> +<span class="k">for</span> <span class="o">(</span><span class="kt">int</span> <span class="n">passId</span> <span class="o">=</span> <span class="mi">0</span><span class="o">;</span> <span class="n">passId</span> <span class="o"><</span> <span class="n">loopCount</span><span class="o">;</span> <span class="n">passId</span><span class="o">++){</span> + <span class="n">kernel</span><span class="o">.</span><span class="na">execute</span><span class="o">(</span><span class="n">range</span><span class="o">);</span> +<span class="o">}</span> +</code></pre></div> +<p>Can be replaced with</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="kt">int</span> <span class="n">range</span> <span class="o">=</span> <span class="mi">1024</span><span class="o">;</span> +<span class="kt">int</span> <span class="n">loopCount</span> <span class="o">=</span> <span class="mi">64</span><span class="o">;</span> + +<span class="n">kernel</span><span class="o">.</span><span class="na">execute</span><span class="o">(</span><span class="n">range</span><span class="o">,</span> <span class="n">loopCount</span><span class="o">);</span> +</code></pre></div> +<p>Not only does this make the code more compact and avoids the use of explicit buffer management APIs, it allows Aparapi visibility to the complete loop so that Aparapi can minimize the number of transfers. Aparapi will only transfer buffers to the GPU once and transfer them back once, resulting in improved performance.</p> + +<p>Sometimes kernel code using this loop-pattern needs to track the current iteration number as the code passed through the outer loop. Previously we would be forced to use explicit buffer management to allow the kernel to do this.</p> + +<p>The code for this would have looked something like</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="kt">int</span> <span class="n">range</span> <span class="o">=</span> <span class="mi">1024</span><span class="o">;</span> +<span class="kt">int</span> <span class="n">loopCount</span> <span class="o">=</span> <span class="mi">64</span><span class="o">;</span> +<span class="kd">final</span> <span class="kt">int</span><span class="o">[]</span> <span class="n">hugeArray</span> <span class="o">=</span> <span class="k">new</span> <span class="kt">int</span><span class="o">[</span><span class="n">HUGE</span><span class="o">];</span> +<span class="kd">final</span> <span class="kt">int</span><span class="o">[]</span> <span class="n">passId</span> <span class="o">=</span> <span class="k">new</span> <span class="kt">int</span><span class="o">[</span><span class="mi">0</span><span class="o">];</span> +<span class="n">Kernel</span> <span class="n">kernel</span> <span class="o">=</span> <span class="k">new</span> <span class="n">Kernel</span><span class="o">(){</span> + <span class="nd">@Override</span> <span class="kd">public</span> <span class="kt">void</span> <span class="nf">run</span><span class="o">(){</span> + <span class="kt">int</span> <span class="n">id</span><span class="o">=</span><span class="n">getGlobalId</span><span class="o">();</span> + <span class="k">if</span> <span class="o">(</span><span class="n">passId</span><span class="o">[</span><span class="mi">0</span><span class="o">]</span> <span class="o">==</span> <span class="mi">0</span><span class="o">){</span> + <span class="c1">// perform some initialization!</span> + <span class="o">}</span> + <span class="o">...</span> <span class="c1">// reads/writes hugeArray</span> + <span class="o">}</span> +<span class="o">};</span> +<span class="n">Kernel</span><span class="o">.</span><span class="na">setExplicit</span><span class="o">(</span><span class="kc">true</span><span class="o">);</span> +<span class="n">kernel</span><span class="o">.</span><span class="na">put</span><span class="o">(</span><span class="n">hugeArray</span><span class="o">);</span> +<span class="k">for</span> <span class="o">(</span><span class="n">passId</span><span class="o">[</span><span class="mi">0</span><span class="o">]=</span><span class="mi">0</span><span class="o">;</span> <span class="n">passId</span><span class="o">[</span><span class="mi">0</span><span class="o">]<</span><span class="n">loopCount</span><span class="o">;</span> <span class="n">passId</span><span class="o">[</span><span class="mi">0</span><span class="o">]++){</span> + + <span class="n">kernel</span><span class="o">.</span><span class="na">put</span><span class="o">(</span><span class="n">passId</span><span class="o">).</span><span class="na">execute</span><span class="o">(</span><span class="n">range</span><span class="o">);</span> +<span class="o">}</span> +</code></pre></div> +<p>In the current version of Aparapi we added <code>Kernel.getPassId()</code> to allow a Kernel to determine the current ‘pass’ through the outer loop without having to use explicit buffer management.</p> + +<p>So the previous code can now be written without any explicit buffer management APIs:-</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="kd">final</span> <span class="kt">int</span><span class="o">[]</span> <span class="n">hugeArray</span> <span class="o">=</span> <span class="k">new</span> <span class="kt">int</span><span class="o">[</span><span class="n">HUGE</span><span class="o">];</span> +<span class="kd">final</span> <span class="kt">int</span><span class="o">[]</span> <span class="n">pass</span><span class="o">[]</span> <span class="o">=</span> <span class="k">new</span> <span class="kt">int</span><span class="o">[]{</span><span class="mi">0</span><span class="o">};</span> +<span class="n">Kernel</span> <span class="n">kernel</span> <span class="o">=</span> <span class="k">new</span> <span class="n">Kernel</span><span class="o">(){</span> + <span class="nd">@Override</span> <span class="kd">public</span> <span class="kt">void</span> <span class="nf">run</span><span class="o">(){</span> + <span class="kt">int</span> <span class="n">id</span> <span class="o">=</span> <span class="n">getGlobalId</span><span class="o">();</span> + <span class="kt">int</span> <span class="n">pass</span> <span class="o">=</span> <span class="n">getPassId</span><span class="o">();</span> + <span class="k">if</span> <span class="o">(</span><span class="n">pass</span> <span class="o">==</span> <span class="mi">0</span><span class="o">){</span> + <span class="c1">// perform some initialization!</span> + <span class="o">}</span> + <span class="o">...</span> <span class="c1">// reads/writes both hugeArray</span> + <span class="o">}</span> +<span class="o">};</span> + +<span class="n">kernel</span><span class="o">.</span><span class="na">execute</span><span class="o">(</span><span class="n">HUGE</span><span class="o">,</span> <span class="mi">1000</span><span class="o">);</span> +</code></pre></div> +<p>One common use for Kernel.getPassId() is to avoid flipping buffers in the outer loop.</p> + +<p>It is common for kernels to process data from one buffer to another, and in the next invocation process the data back the other way. Now these kernels can use the passId (odd or even) to determine the direction of data transfer.</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="kd">final</span> <span class="kt">int</span><span class="o">[]</span> <span class="n">arr1</span> <span class="o">=</span> <span class="k">new</span> <span class="kt">int</span><span class="o">[</span><span class="n">HUGE</span><span class="o">];</span> +<span class="kd">final</span> <span class="kt">int</span><span class="o">[]</span> <span class="n">arr2</span> <span class="o">=</span> <span class="k">new</span> <span class="kt">int</span><span class="o">[</span><span class="n">HUGE</span><span class="o">];</span> +<span class="n">Kernel</span> <span class="n">kernel</span> <span class="o">=</span> <span class="k">new</span> <span class="n">Kernel</span><span class="o">(){</span> + <span class="kt">int</span> <span class="nf">f</span><span class="o">(</span><span class="kt">int</span> <span class="n">v</span><span class="o">){</span> <span class="err">…</span> <span class="o">}</span> + + <span class="nd">@Override</span> <span class="kd">public</span> <span class="kt">void</span> <span class="nf">run</span><span class="o">(){</span> + <span class="kt">int</span> <span class="n">id</span> <span class="o">=</span> <span class="n">getGlobalId</span><span class="o">();</span> + <span class="kt">int</span> <span class="n">pass</span> <span class="o">=</span> <span class="n">getPassId</span><span class="o">();</span> + <span class="k">if</span> <span class="o">(</span><span class="n">pass</span> <span class="o">%</span> <span class="mi">2</span> <span class="o">==</span> <span class="mi">0</span><span class="o">){</span> + <span class="n">arr1</span><span class="o">[</span><span class="n">id</span><span class="o">]</span> <span class="o">=</span> <span class="n">f</span><span class="o">(</span><span class="n">arr2</span><span class="o">[</span><span class="n">id</span><span class="o">]);</span> + <span class="o">}</span><span class="k">else</span><span class="o">{</span> + <span class="n">arr2</span><span class="o">[</span><span class="n">id</span><span class="o">]</span> <span class="o">=</span> <span class="n">f</span><span class="o">(</span><span class="n">arr1</span><span class="o">[</span><span class="n">id</span><span class="o">]);</span> + + <span class="o">}</span> + <span class="o">}</span> +<span class="o">};</span> + +<span class="n">kernel</span><span class="o">.</span><span class="na">execute</span><span class="o">(</span><span class="n">HUGE</span><span class="o">,</span> <span class="mi">1000</span><span class="o">);</span> +</code></pre></div> +</div> +</main> +<footer class='page-footer'> +<div class='container'> +<div class='row'> +<div class='col l4 s12'> +<h5 class='white-text'>Help Aparapi Grow</h5> +<p class='grey-text text-lighten-4'>We are a team of volunteers working on this project like it's our full time job. Any amount would help support and continue development on this project and is greatly appreciated.</p> +<form action='https://www.paypal.com/cgi-bin/webscr' id='paypal-donate' method='post' target='_top'> +<input name='cmd' type='hidden' value='_s-xclick'> +<input name='encrypted' type='hidden' value='-----BEGIN PKCS7-----MIIHoAYJKoZIhvcNAQcEoIIHkTCCB40CAQExggEwMIIBLAIBADCBlDCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20CAQAwDQYJKoZIhvcNAQEBBQAEgYATcKxN8t35TG2x34eY272SuZO3QbGy+BTGIM5DRV6Hmosotzw2TF42ceWmbXb3Gk4Wy5kUgo4TgHExCZHUSlHUl+A9KWLFejotgQJPhbiBsnns3klWbKftA3LEnP/kz/SW7OyBlpluoHoEGb354/aoX3JEctp3akHiZEmD7JyEgjELMAkGBSsOAwIaBQAwggEcBgkqhkiG9w0BBwEwFAYIKoZIhvcNAwcECOGCJwba6JICgIH4RtE1LE3juagKs+swI5tb9Y2LacWo+qn1H1aLKeg57bQMqqcWYvkoO1joYoglPc1h4mO0egZjHPQ6ih0K0IYlXw2SRpNylSlIMUE3GW6smjSSwRhscZfXQYUnmQsfYvkFwoKrlZGf/1u0Q7nwlZ1szIKnDMZ5f+k8xBcM0sMNutn/y9CH6A3zo01gQBIF29+1WYAoQspNAnfWQy3ydV7nbjIA9ThDp2WquWw3EVlvqlvm/3C2AFuH/L4q0ltn3qjkCdzXK0O2jW3TRrzligPkAy6CN0Tw2jGW5GENNC1L92vHFH4kBXUPlhvw39TgoN7/KRUjVoYPYgugggOHMIIDgzCCAuygAwIBAgIBADANBgkqhkiG9w0BAQUFADCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20wHhcNMDQwMjEzMTAxMzE1WhcNMzUwMjEzMTAxMzE1WjCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20wgZ8wDQYJKoZIhvcNAQEBBQADgY0AMIGJAoGBAMFHTt38RMxLXJyO2SmS+Ndl72T7oKJ4u4uw+6awntALWh03PewmIJuzbALScsTS4sZoS1fKciBGoh11gIfHzylvkdNe/hJl66/RGqrj5rFb08sAABNTzDTiqqNpJeBsYs/c2aiGozptX2RlnBktH+SUNpAajW724Nv2Wvhif6sFAgMBAAGjge4wgeswHQYDVR0OBBYEFJaffLvGbxe9WT9S1wob7BDWZJRrMIG7BgNVHSMEgbMwgbCAFJaffLvGbxe9WT9S1wob7BDWZJRroYGUpIGRMIGOMQswCQYDVQQGEwJVUzELMAkGA1UECBMCQ0ExFjAUBgNVBAcTDU1vdW50YWluIFZpZXcxFDASBgNVBAoTC1BheVBhbCBJbmMuMRMwEQYDVQQLFApsaXZlX2NlcnRzMREwDwYDVQQDFAhsaXZlX2FwaTEcMBoGCSqGSIb3DQEJARYNcmVAcGF5cGFsLmNvbYIBADAMBgNVHRMEBTADAQH/MA0GCSqGSIb3DQEBBQUAA4GBAIFfOlaagFrl71+jq6OKidbWFSE+Q4FqROvdgIONth+8kSK//Y/4ihuE4Ymvzn5ceE3S/iBSQQMjyvb+s2TWbQYDwcp129OPIbD9epdr4tJOUNiSojw7BHwYRiPh58S1xGlFgHFXwrEBb3dgNbMUa+u4qectsMAXpVHnD9wIyfmHMYIBmjCCAZYCAQEwgZQwgY4xCzAJBgNVBAYTAlVTMQswCQYDVQQIEwJDQTEWMBQGA1UEBxMNTW91bnRhaW4gVmlldzEUMBIGA1UEChMLUGF5UGFsIEluYy4xEzARBgNVBAsUCmxpdmVfY2VydHMxETAPBgNVBAMUCGxpdmVfYXBpMRwwGgYJKoZIhvcNAQkBFg1yZUBwYXlwYWwuY29tAgEAMAkGBSsOAwIaBQCgXTAYBgkqhkiG9w0BCQMxCwYJKoZIhvcNAQcBMBwGCSqGSIb3DQEJBTEPFw0xNjExMjkyMjA1NTNaMCMGCSqGSIb3DQEJBDEWBBS4i3Exr/pFcKOJy8uKmH+nGIMjqDANBgkqhkiG9w0BAQEFAASBgDAbFZ2jieloeB/0wCAcvYCFAIXmmBaMS5js/byzU5gK7exSTlRMX74IkmHemItaOcw3wyFlu4i118D9K9SbSbFiX9DGDcezGh42u/6G8TuZMwlvmiehwMioTVcm4jWG40YLiv8pJZypfoSx2w4IAFb4na5i/E1qOrwQOpiBho+s-----END PKCS7----- +'> +<button alt='PayPal - The safer, easier way to pay online!' class='btn waves-effect waves-light red lighten-3' name='action' type='submit'> +Donate Now +</button> +</form> +</div> +<div class='col l4 s12'> +<h5 class='white-text'>Join the Discussion</h5> +<p class='grey-text text-lighten-4'>We have a Gitter chat room set up where you can talk directly with us. Come in and discuss new features, future goals, general problems or questions, or anything else you can think of.</p> +<a class='btn waves-effect waves-light red lighten-3' href='https://gitter.im/Syncleus/aparapi' target='_blank'>Chat</a> +</div> +<div class='col l4 s12' style='overflow: hidden;'> +<h5 class='white-text'>Connect</h5> +<iframe allowtransparency='true' frameborder='0' height='30' scrolling='0' src='http://ghbtns.com/github-btn.html?user=Syncleus&repo=aparapi&type=watch&count=true&size=large' width='170'></iframe> +<br> +<a class='twitter-follow-button' data-dnt='true' data-show-count='true' data-size='large' href='https://twitter.com/AparapiLib'>Follow @AparapiLib</a> +<br> +<div class='g-follow' data-annotation='bubble' data-height='24' data-href='https://plus.google.com/102266131584900704956' data-rel='publisher'></div> +</div> +</div> +</div> +<div class='footer-copyright'> +<div class='container'> +© 2016-2017 Syncleus, All rights reserved. +<a class='grey-text text-lighten-4 right' href='https://github.com/Syncleus/aparapi/blob/master/LICENSE'>Apache License v2</a> +</div> +</div> +</footer> +<!-- Scripts --> +<script src='https://code.jquery.com/jquery-2.1.4.min.js'></script> +<script> + if (!window.jQuery) { document.write('<script src="bin/jquery-2.1.1.min.js"><\/script>'); } +</script> +<script src='/javascripts/jquery.timeago.js'></script> +<script src='/javascripts/materialize.min.js'></script> +<script src='/javascripts/lunr.min.js'></script> +<script src='/javascripts/search.js'></script> +<script src='/javascripts/materialize.js'></script> +<script src='/javascripts/init.js'></script> +<!-- Twitter Button --> +<script> + !function(d,s,id){var js,fjs=d.getElementsByTagName(s)[0],p=/^http:/.test(d.location)?'http':'https';if(!d.getElementById(id)){js=d.createElement(s);js.id=id;js.src=p+'://platform.twitter.com/widgets.js';fjs.parentNode.insertBefore(js,fjs);}}(document, 'script', 'twitter-wjs'); +</script> +<!-- Google Plus Button --> +<script async='' defer='defer' src='https://apis.google.com/js/platform.js'></script> +</body> </html> diff --git a/documentation/hsa-enabled-lambda.html b/documentation/hsa-enabled-lambda.html index e61c5db79f6acbf5dc7671b142e8a7782e3aca97..3585237055c0d29aabf54ae076b4b1a637800c9c 100644 --- a/documentation/hsa-enabled-lambda.html +++ b/documentation/hsa-enabled-lambda.html @@ -1,150 +1,152 @@ <!DOCTYPE html> <html lang='en'> - <head> - <meta content='text/html; charset=UTF-8' http-equiv='Content-Type'> - <meta content='width=device-width, initial-scale=1' name='viewport'> - <meta content='IE=edge' http-equiv='X-UA-Compatible'> - <meta content='no' name='msapplication-tap-highlight'> - <meta content='Aparapi is an Open-source framework for executing native Java code on the GPU, developed by Syncleus.' name='description'> - <title> - Aparapi | HSA Enabled Lambda - </title> - <!-- Favicons --> - <link href='/images/favicon/apple-touch-icon-152x152.png' rel='apple-touch-icon-precomposed'> - <meta content='#FFFFFF' name='msapplication-TileColor'> - <meta content='/images/favicon/mstile-144x144.png' name='msapplication-TileImage'> - <link href='/images/favicon/favicon-32x32.png' rel='icon' sizes='32x32'> - <!-- Android 5 Chrome Color --> - <meta content='#EE6E73' name='theme-color'> - <!-- CSS --> - <link href='/stylesheets/highlight.css' media='screen,projection' rel='stylesheet' type='text/css'> - <link href='/stylesheets/style.css' media='screen,projection' rel='stylesheet' type='text/css'> - <link href='http://fonts.googleapis.com/css?family=Inconsolata' rel='stylesheet' type='text/css'> - <link href='http://fonts.googleapis.com/icon?family=Material+Icons' rel='stylesheet'> - </head> - <body> - <header> - <div class='container'> - <a class='button-collapse top-nav waves-effect waves-light circle hide-on-large-only' data-activates='nav-mobile' href='#'> - <i class='material-icons'>menu</i> - </a> - </div> - <ul class='side-nav fixed' id='nav-mobile'> - <li class='logo'> - <a class='brand-logo' href='/' id='logo-container'> - <object data='/images/logo.svg' id='front-page-logo' type='image/svg+xml'>Your browser does not support SVG</object> - </a> - </li> - <li class='search'> - <div class='search-wrapper card'> - <input id='search'> - <i class='material-icons'>search</i> - <div class='search-results'></div> - </div> - </li> - <li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/">Overview</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Introduction</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/about.html">About</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/getting-started.html">Getting Started</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/faq.html">FAQ</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Documentation</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/aparapi-patterns.html">Aparapi Patterns</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/choosing-specific-devices.html">Choosing Specific Devices</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/converting-java-to-opencl.html">Converting Java to OpenCL</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/emulating-multiple-entrypoints.html">Emulating Multiple Entrypoints</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/explicit-buffer-handling.html">Explicit Buffer Handling</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/hsa-enabled-lambda.html">HSA Enabled Lambda</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/kernel-guidelines.html">Kernel Guidelines</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/library-agent-duality.html">Library Agent Duality</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/new-features.html">New Features</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/opencl-bindings.html">OpenCL Bindings</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/private-memory-space.html">Private Memory Space</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/profiling-the-kernel.html">Profiling the Kernel</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/setting-up-hsa.html">Setting Up HSA</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/unit-tests.html">Unit Tests</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/using-hsa-simulator.html">Using HSA Simulator</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/constant-memory.html">Constant Memory</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/local-memory.html">Local Memory</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/multiple-dim-ranges.html">Multiple Dim Ranges</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Proposals</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/multiple-dim-nd-range.html">Multiple Dim ND Range</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/lambdas.html">Lambdas</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/address-space-with-buffers.html">Address Space with Buffers</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/extensions.html">Extensions</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/device.html">Device</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/multiple-entry-points.html">Multiple Entry Points</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/lambda-syntax.html">Lambda Syntax</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/showcase.html">Showcase</a></li></ul></li> - </ul> - </header> - <main> - <div class='section no-pad-bot' id='index-banner'> - <div class='container'> - <h1 class='header center-on-small-only'>HSA Enabled Lambda</h1> - <div class='row center'> - <h4 class='header col s12 light center'>Adding HSA Support to Aparapi lambda branch.</h4> - </div> - - </div> - - </div> - <div class='container'> - <ul> - <li><a href="/documentation/setting-up-hsa.html" rel="nofollow">How to setup a HSA enabled Linux Platform</a></li> - <li><a href="/documentation/using-hsa-simulator.html" rel="nofollow">How to setup a HSA simulator on a Linux Platform</a></li> - </ul> - - <p>Recently the HSA Foundation released their ‘Programmers Reference Manual’. This manual is for developers wishing to write code for upcoming HSA compatible devices, it describes the HSA Intermediate Language (HSAIL) along with its binary form (BRIG) and describes how code is expected to execute on a HSA enabled devices.</p> - - <p>In many ways we can think of HSAIL as we do Java bytecode. It is a common intermediate form that can be optimized at runtime to execute across a variety of future heterogeneous platforms. HSAIL will greatly simplify the development of software taking advantage of both sequential and parallel compute solutions.</p> - - <p>Now that the spec is out, we have started adding HSA support to the Aparapi lambda branch. We believe that HSA combined with the upcoming Java 8 feature lambda will be a natural way to express parallel algorithms which can be executed on the GPU via HSA.</p> - - <p>A HSA+Lambda enabled Aparapi will remove many of Aparapi’s constraints. HSA allows all of the CPU’s memory to be accessed directly from code running on the GPU. This means</p> - - <ul> - <li>We no longer need to move data from the host CPU to the GPU.</li> - <li>We are no longer limited to the memory addressable from the GPU</li> - <li>We can access multi-dim arrays efficiently</li> - <li>We can access Java objects directly from the GPU. - These are all substantial benefits.</li> - </ul> - - <p>In the existing code (early prototype) we provide access to HSA as a specific device type.</p> - - <p>So our ubiquitous ‘squares’ example will initially be written as:</p> - <pre class="highlight java"><code> - <span class="kt">int</span> <span class="n">in</span><span class="o">[]</span> <span class="o">=</span> <span class="o">..</span><span class="c1">//</span> - <span class="kt">int</span> <span class="n">out</span><span class="o">[]</span> <span class="o">=</span> <span class="o">.../</span> - <span class="n">Device</span><span class="o">.</span><span class="na">hsa</span><span class="o">().</span><span class="na">forEach</span><span class="o">(</span><span class="n">in</span><span class="o">.</span><span class="na">length</span><span class="o">,</span> <span class="o">(</span><span class="n">i</span><span class="o">)->{</span> - <span class="n">out</span><span class="o">[</span><span class="n">i</span><span class="o">]</span> <span class="o">=</span> <span class="n">in</span><span class="o">[</span><span class="n">i</span><span class="o">]*</span><span class="n">in</span><span class="o">[</span><span class="n">i</span><span class="o">];</span> - <span class="o">});</span> - </code></pre> - <p>You will obviously need a Java 8 compatible JDK (<a href="https://jdk8.java.net/download.html" rel="nofollow">https://jdk8.java.net/download.html</a>) in your path.</p> - - <p>We also recommend using IntelliJ which has preliminary support for Java 8 lambda features. You can download the community edition of IntelliJ from <a href="http://www.jetbrains.com/idea/" rel="nofollow">http://www.jetbrains.com/idea/</a></p> - </div> - </main> - <footer class='page-footer'> - <div class='container'> - <div class='row'> - <div class='col l4 s12'> - <h5 class='white-text'>Help Aparapi Grow</h5> - <p class='grey-text text-lighten-4'>We are a team of volunteers working on this project like it's our full time job. Any amount would help support and continue development on this project and is greatly appreciated.</p> - <form action='https://www.paypal.com/cgi-bin/webscr' id='paypal-donate' method='post' target='_top'> - <input name='cmd' type='hidden' value='_s-xclick'> - <input name='encrypted' type='hidden' value='-----BEGIN PKCS7-----MIIHoAYJKoZIhvcNAQcEoIIHkTCCB40CAQExggEwMIIBLAIBADCBlDCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20CAQAwDQYJKoZIhvcNAQEBBQAEgYATcKxN8t35TG2x34eY272SuZO3QbGy+BTGIM5DRV6Hmosotzw2TF42ceWmbXb3Gk4Wy5kUgo4TgHExCZHUSlHUl+A9KWLFejotgQJPhbiBsnns3klWbKftA3LEnP/kz/SW7OyBlpluoHoEGb354/aoX3JEctp3akHiZEmD7JyEgjELMAkGBSsOAwIaBQAwggEcBgkqhkiG9w0BBwEwFAYIKoZIhvcNAwcECOGCJwba6JICgIH4RtE1LE3juagKs+swI5tb9Y2LacWo+qn1H1aLKeg57bQMqqcWYvkoO1joYoglPc1h4mO0egZjHPQ6ih0K0IYlXw2SRpNylSlIMUE3GW6smjSSwRhscZfXQYUnmQsfYvkFwoKrlZGf/1u0Q7nwlZ1szIKnDMZ5f+k8xBcM0sMNutn/y9CH6A3zo01gQBIF29+1WYAoQspNAnfWQy3ydV7nbjIA9ThDp2WquWw3EVlvqlvm/3C2AFuH/L4q0ltn3qjkCdzXK0O2jW3TRrzligPkAy6CN0Tw2jGW5GENNC1L92vHFH4kBXUPlhvw39TgoN7/KRUjVoYPYgugggOHMIIDgzCCAuygAwIBAgIBADANBgkqhkiG9w0BAQUFADCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20wHhcNMDQwMjEzMTAxMzE1WhcNMzUwMjEzMTAxMzE1WjCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20wgZ8wDQYJKoZIhvcNAQEBBQADgY0AMIGJAoGBAMFHTt38RMxLXJyO2SmS+Ndl72T7oKJ4u4uw+6awntALWh03PewmIJuzbALScsTS4sZoS1fKciBGoh11gIfHzylvkdNe/hJl66/RGqrj5rFb08sAABNTzDTiqqNpJeBsYs/c2aiGozptX2RlnBktH+SUNpAajW724Nv2Wvhif6sFAgMBAAGjge4wgeswHQYDVR0OBBYEFJaffLvGbxe9WT9S1wob7BDWZJRrMIG7BgNVHSMEgbMwgbCAFJaffLvGbxe9WT9S1wob7BDWZJRroYGUpIGRMIGOMQswCQYDVQQGEwJVUzELMAkGA1UECBMCQ0ExFjAUBgNVBAcTDU1vdW50YWluIFZpZXcxFDASBgNVBAoTC1BheVBhbCBJbmMuMRMwEQYDVQQLFApsaXZlX2NlcnRzMREwDwYDVQQDFAhsaXZlX2FwaTEcMBoGCSqGSIb3DQEJARYNcmVAcGF5cGFsLmNvbYIBADAMBgNVHRMEBTADAQH/MA0GCSqGSIb3DQEBBQUAA4GBAIFfOlaagFrl71+jq6OKidbWFSE+Q4FqROvdgIONth+8kSK//Y/4ihuE4Ymvzn5ceE3S/iBSQQMjyvb+s2TWbQYDwcp129OPIbD9epdr4tJOUNiSojw7BHwYRiPh58S1xGlFgHFXwrEBb3dgNbMUa+u4qectsMAXpVHnD9wIyfmHMYIBmjCCAZYCAQEwgZQwgY4xCzAJBgNVBAYTAlVTMQswCQYDVQQIEwJDQTEWMBQGA1UEBxMNTW91bnRhaW4gVmlldzEUMBIGA1UEChMLUGF5UGFsIEluYy4xEzARBgNVBAsUCmxpdmVfY2VydHMxETAPBgNVBAMUCGxpdmVfYXBpMRwwGgYJKoZIhvcNAQkBFg1yZUBwYXlwYWwuY29tAgEAMAkGBSsOAwIaBQCgXTAYBgkqhkiG9w0BCQMxCwYJKoZIhvcNAQcBMBwGCSqGSIb3DQEJBTEPFw0xNjExMjkyMjA1NTNaMCMGCSqGSIb3DQEJBDEWBBS4i3Exr/pFcKOJy8uKmH+nGIMjqDANBgkqhkiG9w0BAQEFAASBgDAbFZ2jieloeB/0wCAcvYCFAIXmmBaMS5js/byzU5gK7exSTlRMX74IkmHemItaOcw3wyFlu4i118D9K9SbSbFiX9DGDcezGh42u/6G8TuZMwlvmiehwMioTVcm4jWG40YLiv8pJZypfoSx2w4IAFb4na5i/E1qOrwQOpiBho+s-----END PKCS7-----'> - <button alt='PayPal - The safer, easier way to pay online!' class='btn waves-effect waves-light red lighten-3' name='action' type='submit'> - Donate Now - </button> - </form> - </div> - <div class='col l4 s12'> - <h5 class='white-text'>Join the Discussion</h5> - <p class='grey-text text-lighten-4'>We have a Gitter chat room set up where you can talk directly with us. Come in and discuss new features, future goals, general problems or questions, or anything else you can think of.</p> - <a class='btn waves-effect waves-light red lighten-3' href='https://gitter.im/Syncleus/aparapi' target='_blank'>Chat</a> - </div> - <div class='col l4 s12' style='overflow: hidden;'> - <h5 class='white-text'>Connect</h5> - <iframe allowtransparency='true' frameborder='0' height='30' scrolling='0' src='http://ghbtns.com/github-btn.html?user=Syncleus&repo=aparapi&type=watch&count=true&size=large' width='170'></iframe> - <br> - <a class='twitter-follow-button' data-dnt='true' data-show-count='true' data-size='large' href='https://twitter.com/AparapiLib'>Follow @AparapiLib</a> - <br> - <div class='g-follow' data-annotation='bubble' data-height='24' data-href='https://plus.google.com/102266131584900704956' data-rel='publisher'></div> - </div> - </div> - </div> - <div class='footer-copyright'> - <div class='container'> - © 2016-2017 Syncleus, All rights reserved. - <a class='grey-text text-lighten-4 right' href='https://github.com/Syncleus/aparapi/blob/master/LICENSE'>Apache License v2</a> - </div> - </div> - </footer> - <!-- Scripts --> - <script src='https://code.jquery.com/jquery-2.1.4.min.js'></script> - <script> - if (!window.jQuery) { document.write('<script src="bin/jquery-2.1.1.min.js"><\/script>'); } - </script> - <script src='/javascripts/jquery.timeago.js'></script> - <script src='/javascripts/materialize.min.js'></script> - <script src='/javascripts/lunr.min.js'></script> - <script src='/javascripts/search.js'></script> - <script src='/javascripts/materialize.js'></script> - <script src='/javascripts/init.js'></script> - <!-- Twitter Button --> - <script> - !function(d,s,id){var js,fjs=d.getElementsByTagName(s)[0],p=/^http:/.test(d.location)?'http':'https';if(!d.getElementById(id)){js=d.createElement(s);js.id=id;js.src=p+'://platform.twitter.com/widgets.js';fjs.parentNode.insertBefore(js,fjs);}}(document, 'script', 'twitter-wjs'); - </script> - <!-- Google Plus Button --> - <script async='' defer='defer' src='https://apis.google.com/js/platform.js'></script> - </body> +<head> +<meta content='text/html; charset=UTF-8' http-equiv='Content-Type'> +<meta content='width=device-width, initial-scale=1' name='viewport'> +<meta content='IE=edge' http-equiv='X-UA-Compatible'> +<meta content='no' name='msapplication-tap-highlight'> +<meta content='Aparapi is an Open-source framework for executing native Java code on the GPU, developed by Syncleus.' name='description'> +<title> +Aparapi | HSA Enabled Lambda +</title> +<!-- Favicons --> +<link href='/images/favicon/apple-touch-icon-152x152.png' rel='apple-touch-icon-precomposed'> +<meta content='#FFFFFF' name='msapplication-TileColor'> +<meta content='/images/favicon/mstile-144x144.png' name='msapplication-TileImage'> +<link href='/images/favicon/favicon-32x32.png' rel='icon' sizes='32x32'> +<!-- Android 5 Chrome Color --> +<meta content='#EE6E73' name='theme-color'> +<!-- CSS --> +<link href='/stylesheets/highlight.css' media='screen,projection' rel='stylesheet' type='text/css'> +<link href='/stylesheets/style.css' media='screen,projection' rel='stylesheet' type='text/css'> +<link href='http://fonts.googleapis.com/css?family=Inconsolata' rel='stylesheet' type='text/css'> +<link href='http://fonts.googleapis.com/icon?family=Material+Icons' rel='stylesheet'> +</head> +<body> +<header> +<div class='container'> +<a class='button-collapse top-nav waves-effect waves-light circle hide-on-large-only' data-activates='nav-mobile' href='#'> +<i class='material-icons'>menu</i> +</a> +</div> +<ul class='side-nav fixed' id='nav-mobile'> +<li class='logo'> +<a class='brand-logo' href='/' id='logo-container'> +<object data='/images/logo.svg' id='front-page-logo' type='image/svg+xml'>Your browser does not support SVG</object> +</a> +</li> +<li class='search'> +<div class='search-wrapper card'> +<input id='search'> +<i class='material-icons'>search</i> +<div class='search-results'></div> +</div> +</li> +<li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/">Overview</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Introduction</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/about.html">About</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/getting-started.html">Getting Started</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/faq.html">FAQ</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Documentation</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/aparapi-patterns.html">Aparapi Patterns</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/choosing-specific-devices.html">Choosing Specific Devices</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/converting-java-to-opencl.html">Converting Java to OpenCL</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/emulating-multiple-entrypoints.html">Emulating Multiple Entrypoints</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/explicit-buffer-handling.html">Explicit Buffer Handling</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/hsa-enabled-lambda.html">HSA Enabled Lambda</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/kernel-guidelines.html">Kernel Guidelines</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/library-agent-duality.html">Library Agent Duality</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/new-features.html">New Features</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/opencl-bindings.html">OpenCL Bindings</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/private-memory-space.html">Private Memory Space</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/profiling-the-kernel.html">Profiling the Kernel</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/setting-up-hsa.html">Setting Up HSA</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/unit-tests.html">Unit Tests</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/using-hsa-simulator.html">Using HSA Simulator</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/constant-memory.html">Constant Memory</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/local-memory.html">Local Memory</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/multiple-dim-ranges.html">Multiple Dim Ranges</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Proposals</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/multiple-dim-nd-range.html">Multiple Dim ND Range</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/lambdas.html">Lambdas</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/address-space-with-buffers.html">Address Space with Buffers</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/extensions.html">Extensions</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/device.html">Device</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/multiple-entry-points.html">Multiple Entry Points</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/lambda-syntax.html">Lambda Syntax</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/showcase.html">Showcase</a></li></ul></li> +</ul> +</header> +<main> +<div class='section no-pad-bot' id='index-banner'> +<div class='container'> +<h1 class='header center-on-small-only'>HSA Enabled Lambda</h1> +<div class='row center'> +<h4 class='header col s12 light center'>Adding HSA Support to Aparapi lambda branch.</h4> +</div> + +</div> + +</div> +<div class='container'> +<ul> +<li><a href="/documentation/setting-up-hsa.html" rel="nofollow">How to setup a HSA enabled Linux Platform</a></li> +<li><a href="/documentation/using-hsa-simulator.html" rel="nofollow">How to setup a HSA simulator on a Linux Platform</a></li> +</ul> + +<p>Recently the HSA Foundation released their ‘Programmers Reference Manual’. This manual is for developers wishing to write code for upcoming HSA compatible devices, it describes the HSA Intermediate Language (HSAIL) along with its binary form (BRIG) and describes how code is expected to execute on a HSA enabled devices.</p> + +<p>In many ways we can think of HSAIL as we do Java bytecode. It is a common intermediate form that can be optimized at runtime to execute across a variety of future heterogeneous platforms. HSAIL will greatly simplify the development of software taking advantage of both sequential and parallel compute solutions.</p> + +<p>Now that the spec is out, we have started adding HSA support to the Aparapi lambda branch. We believe that HSA combined with the upcoming Java 8 feature lambda will be a natural way to express parallel algorithms which can be executed on the GPU via HSA.</p> + +<p>A HSA+Lambda enabled Aparapi will remove many of Aparapi’s constraints. HSA allows all of the CPU’s memory to be accessed directly from code running on the GPU. This means</p> + +<ul> +<li>We no longer need to move data from the host CPU to the GPU.</li> +<li>We are no longer limited to the memory addressable from the GPU</li> +<li>We can access multi-dim arrays efficiently</li> +<li>We can access Java objects directly from the GPU. +These are all substantial benefits.</li> +</ul> + +<p>In the existing code (early prototype) we provide access to HSA as a specific device type.</p> + +<p>So our ubiquitous ‘squares’ example will initially be written as:</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="kt">int</span> <span class="n">in</span><span class="o">[]</span> <span class="o">=</span> <span class="o">..</span><span class="c1">//</span> +<span class="kt">int</span> <span class="n">out</span><span class="o">[]</span> <span class="o">=</span> <span class="o">.../</span> +<span class="n">Device</span><span class="o">.</span><span class="na">hsa</span><span class="o">().</span><span class="na">forEach</span><span class="o">(</span><span class="n">in</span><span class="o">.</span><span class="na">length</span><span class="o">,</span> <span class="o">(</span><span class="n">i</span><span class="o">)->{</span> + <span class="n">out</span><span class="o">[</span><span class="n">i</span><span class="o">]</span> <span class="o">=</span> <span class="n">in</span><span class="o">[</span><span class="n">i</span><span class="o">]*</span><span class="n">in</span><span class="o">[</span><span class="n">i</span><span class="o">];</span> + <span class="o">});</span> +</code></pre></div> +<p>You will obviously need a Java 8 compatible JDK (<a href="https://jdk8.java.net/download.html" rel="nofollow">https://jdk8.java.net/download.html</a>) in your path.</p> + +<p>We also recommend using IntelliJ which has preliminary support for Java 8 lambda features. You can download the community edition of IntelliJ from <a href="http://www.jetbrains.com/idea/" rel="nofollow">http://www.jetbrains.com/idea/</a></p> + +</div> +</main> +<footer class='page-footer'> +<div class='container'> +<div class='row'> +<div class='col l4 s12'> +<h5 class='white-text'>Help Aparapi Grow</h5> +<p class='grey-text text-lighten-4'>We are a team of volunteers working on this project like it's our full time job. Any amount would help support and continue development on this project and is greatly appreciated.</p> +<form action='https://www.paypal.com/cgi-bin/webscr' id='paypal-donate' method='post' target='_top'> +<input name='cmd' type='hidden' value='_s-xclick'> +<input name='encrypted' type='hidden' value='-----BEGIN PKCS7-----MIIHoAYJKoZIhvcNAQcEoIIHkTCCB40CAQExggEwMIIBLAIBADCBlDCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20CAQAwDQYJKoZIhvcNAQEBBQAEgYATcKxN8t35TG2x34eY272SuZO3QbGy+BTGIM5DRV6Hmosotzw2TF42ceWmbXb3Gk4Wy5kUgo4TgHExCZHUSlHUl+A9KWLFejotgQJPhbiBsnns3klWbKftA3LEnP/kz/SW7OyBlpluoHoEGb354/aoX3JEctp3akHiZEmD7JyEgjELMAkGBSsOAwIaBQAwggEcBgkqhkiG9w0BBwEwFAYIKoZIhvcNAwcECOGCJwba6JICgIH4RtE1LE3juagKs+swI5tb9Y2LacWo+qn1H1aLKeg57bQMqqcWYvkoO1joYoglPc1h4mO0egZjHPQ6ih0K0IYlXw2SRpNylSlIMUE3GW6smjSSwRhscZfXQYUnmQsfYvkFwoKrlZGf/1u0Q7nwlZ1szIKnDMZ5f+k8xBcM0sMNutn/y9CH6A3zo01gQBIF29+1WYAoQspNAnfWQy3ydV7nbjIA9ThDp2WquWw3EVlvqlvm/3C2AFuH/L4q0ltn3qjkCdzXK0O2jW3TRrzligPkAy6CN0Tw2jGW5GENNC1L92vHFH4kBXUPlhvw39TgoN7/KRUjVoYPYgugggOHMIIDgzCCAuygAwIBAgIBADANBgkqhkiG9w0BAQUFADCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20wHhcNMDQwMjEzMTAxMzE1WhcNMzUwMjEzMTAxMzE1WjCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20wgZ8wDQYJKoZIhvcNAQEBBQADgY0AMIGJAoGBAMFHTt38RMxLXJyO2SmS+Ndl72T7oKJ4u4uw+6awntALWh03PewmIJuzbALScsTS4sZoS1fKciBGoh11gIfHzylvkdNe/hJl66/RGqrj5rFb08sAABNTzDTiqqNpJeBsYs/c2aiGozptX2RlnBktH+SUNpAajW724Nv2Wvhif6sFAgMBAAGjge4wgeswHQYDVR0OBBYEFJaffLvGbxe9WT9S1wob7BDWZJRrMIG7BgNVHSMEgbMwgbCAFJaffLvGbxe9WT9S1wob7BDWZJRroYGUpIGRMIGOMQswCQYDVQQGEwJVUzELMAkGA1UECBMCQ0ExFjAUBgNVBAcTDU1vdW50YWluIFZpZXcxFDASBgNVBAoTC1BheVBhbCBJbmMuMRMwEQYDVQQLFApsaXZlX2NlcnRzMREwDwYDVQQDFAhsaXZlX2FwaTEcMBoGCSqGSIb3DQEJARYNcmVAcGF5cGFsLmNvbYIBADAMBgNVHRMEBTADAQH/MA0GCSqGSIb3DQEBBQUAA4GBAIFfOlaagFrl71+jq6OKidbWFSE+Q4FqROvdgIONth+8kSK//Y/4ihuE4Ymvzn5ceE3S/iBSQQMjyvb+s2TWbQYDwcp129OPIbD9epdr4tJOUNiSojw7BHwYRiPh58S1xGlFgHFXwrEBb3dgNbMUa+u4qectsMAXpVHnD9wIyfmHMYIBmjCCAZYCAQEwgZQwgY4xCzAJBgNVBAYTAlVTMQswCQYDVQQIEwJDQTEWMBQGA1UEBxMNTW91bnRhaW4gVmlldzEUMBIGA1UEChMLUGF5UGFsIEluYy4xEzARBgNVBAsUCmxpdmVfY2VydHMxETAPBgNVBAMUCGxpdmVfYXBpMRwwGgYJKoZIhvcNAQkBFg1yZUBwYXlwYWwuY29tAgEAMAkGBSsOAwIaBQCgXTAYBgkqhkiG9w0BCQMxCwYJKoZIhvcNAQcBMBwGCSqGSIb3DQEJBTEPFw0xNjExMjkyMjA1NTNaMCMGCSqGSIb3DQEJBDEWBBS4i3Exr/pFcKOJy8uKmH+nGIMjqDANBgkqhkiG9w0BAQEFAASBgDAbFZ2jieloeB/0wCAcvYCFAIXmmBaMS5js/byzU5gK7exSTlRMX74IkmHemItaOcw3wyFlu4i118D9K9SbSbFiX9DGDcezGh42u/6G8TuZMwlvmiehwMioTVcm4jWG40YLiv8pJZypfoSx2w4IAFb4na5i/E1qOrwQOpiBho+s-----END PKCS7----- +'> +<button alt='PayPal - The safer, easier way to pay online!' class='btn waves-effect waves-light red lighten-3' name='action' type='submit'> +Donate Now +</button> +</form> +</div> +<div class='col l4 s12'> +<h5 class='white-text'>Join the Discussion</h5> +<p class='grey-text text-lighten-4'>We have a Gitter chat room set up where you can talk directly with us. Come in and discuss new features, future goals, general problems or questions, or anything else you can think of.</p> +<a class='btn waves-effect waves-light red lighten-3' href='https://gitter.im/Syncleus/aparapi' target='_blank'>Chat</a> +</div> +<div class='col l4 s12' style='overflow: hidden;'> +<h5 class='white-text'>Connect</h5> +<iframe allowtransparency='true' frameborder='0' height='30' scrolling='0' src='http://ghbtns.com/github-btn.html?user=Syncleus&repo=aparapi&type=watch&count=true&size=large' width='170'></iframe> +<br> +<a class='twitter-follow-button' data-dnt='true' data-show-count='true' data-size='large' href='https://twitter.com/AparapiLib'>Follow @AparapiLib</a> +<br> +<div class='g-follow' data-annotation='bubble' data-height='24' data-href='https://plus.google.com/102266131584900704956' data-rel='publisher'></div> +</div> +</div> +</div> +<div class='footer-copyright'> +<div class='container'> +© 2016-2017 Syncleus, All rights reserved. +<a class='grey-text text-lighten-4 right' href='https://github.com/Syncleus/aparapi/blob/master/LICENSE'>Apache License v2</a> +</div> +</div> +</footer> +<!-- Scripts --> +<script src='https://code.jquery.com/jquery-2.1.4.min.js'></script> +<script> + if (!window.jQuery) { document.write('<script src="bin/jquery-2.1.1.min.js"><\/script>'); } +</script> +<script src='/javascripts/jquery.timeago.js'></script> +<script src='/javascripts/materialize.min.js'></script> +<script src='/javascripts/lunr.min.js'></script> +<script src='/javascripts/search.js'></script> +<script src='/javascripts/materialize.js'></script> +<script src='/javascripts/init.js'></script> +<!-- Twitter Button --> +<script> + !function(d,s,id){var js,fjs=d.getElementsByTagName(s)[0],p=/^http:/.test(d.location)?'http':'https';if(!d.getElementById(id)){js=d.createElement(s);js.id=id;js.src=p+'://platform.twitter.com/widgets.js';fjs.parentNode.insertBefore(js,fjs);}}(document, 'script', 'twitter-wjs'); +</script> +<!-- Google Plus Button --> +<script async='' defer='defer' src='https://apis.google.com/js/platform.js'></script> +</body> </html> diff --git a/documentation/kernel-guidelines.html b/documentation/kernel-guidelines.html index 3bda619563e1aef789b2178c747ee2f4761ffe78..5ed2c439cb537174f701434260c575c1c6264b2e 100644 --- a/documentation/kernel-guidelines.html +++ b/documentation/kernel-guidelines.html @@ -1,206 +1,208 @@ <!DOCTYPE html> <html lang='en'> - <head> - <meta content='text/html; charset=UTF-8' http-equiv='Content-Type'> - <meta content='width=device-width, initial-scale=1' name='viewport'> - <meta content='IE=edge' http-equiv='X-UA-Compatible'> - <meta content='no' name='msapplication-tap-highlight'> - <meta content='Aparapi is an Open-source framework for executing native Java code on the GPU, developed by Syncleus.' name='description'> - <title> - Aparapi | Kernel Guidelines - </title> - <!-- Favicons --> - <link href='/images/favicon/apple-touch-icon-152x152.png' rel='apple-touch-icon-precomposed'> - <meta content='#FFFFFF' name='msapplication-TileColor'> - <meta content='/images/favicon/mstile-144x144.png' name='msapplication-TileImage'> - <link href='/images/favicon/favicon-32x32.png' rel='icon' sizes='32x32'> - <!-- Android 5 Chrome Color --> - <meta content='#EE6E73' name='theme-color'> - <!-- CSS --> - <link href='/stylesheets/highlight.css' media='screen,projection' rel='stylesheet' type='text/css'> - <link href='/stylesheets/style.css' media='screen,projection' rel='stylesheet' type='text/css'> - <link href='http://fonts.googleapis.com/css?family=Inconsolata' rel='stylesheet' type='text/css'> - <link href='http://fonts.googleapis.com/icon?family=Material+Icons' rel='stylesheet'> - </head> - <body> - <header> - <div class='container'> - <a class='button-collapse top-nav waves-effect waves-light circle hide-on-large-only' data-activates='nav-mobile' href='#'> - <i class='material-icons'>menu</i> - </a> - </div> - <ul class='side-nav fixed' id='nav-mobile'> - <li class='logo'> - <a class='brand-logo' href='/' id='logo-container'> - <object data='/images/logo.svg' id='front-page-logo' type='image/svg+xml'>Your browser does not support SVG</object> - </a> - </li> - <li class='search'> - <div class='search-wrapper card'> - <input id='search'> - <i class='material-icons'>search</i> - <div class='search-results'></div> - </div> - </li> - <li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/">Overview</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Introduction</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/about.html">About</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/getting-started.html">Getting Started</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/faq.html">FAQ</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Documentation</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/aparapi-patterns.html">Aparapi Patterns</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/choosing-specific-devices.html">Choosing Specific Devices</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/converting-java-to-opencl.html">Converting Java to OpenCL</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/emulating-multiple-entrypoints.html">Emulating Multiple Entrypoints</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/explicit-buffer-handling.html">Explicit Buffer Handling</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/hsa-enabled-lambda.html">HSA Enabled Lambda</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/kernel-guidelines.html">Kernel Guidelines</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/library-agent-duality.html">Library Agent Duality</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/new-features.html">New Features</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/opencl-bindings.html">OpenCL Bindings</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/private-memory-space.html">Private Memory Space</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/profiling-the-kernel.html">Profiling the Kernel</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/setting-up-hsa.html">Setting Up HSA</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/unit-tests.html">Unit Tests</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/using-hsa-simulator.html">Using HSA Simulator</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/constant-memory.html">Constant Memory</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/local-memory.html">Local Memory</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/multiple-dim-ranges.html">Multiple Dim Ranges</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Proposals</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/multiple-dim-nd-range.html">Multiple Dim ND Range</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/lambdas.html">Lambdas</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/address-space-with-buffers.html">Address Space with Buffers</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/extensions.html">Extensions</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/device.html">Device</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/multiple-entry-points.html">Multiple Entry Points</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/lambda-syntax.html">Lambda Syntax</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/showcase.html">Showcase</a></li></ul></li> - </ul> - </header> - <main> - <div class='section no-pad-bot' id='index-banner'> - <div class='container'> - <h1 class='header center-on-small-only'>Kernel Guidelines</h1> - <div class='row center'> - <h4 class='header col s12 light center'>What code can and can't be converted to OpenCL by Aparapi.</h4> - </div> - - </div> - - </div> - <div class='container'> - <h2>Aparapi Java Kernel Guidelines</h2> - - <p>Certain practices can improve the chances of your Java kernel being converted to OpenCL and executing on a GPU.</p> - - <p>The following guidelines/restrictions only apply to the Kernel.run() method and any method reachable from run() (called†run-reachable methods†in this documentation), clearly any methods executed via a normal Java execution path will not be subject to these restrictions.</p> - - <p>Some restrictions/guidelines may be removed or augmented in a future Aparapi releases.</p> - - <h2>Data Types</h2> - - <ul> - <li>Only the Java primitive data types boolean, byte, short, int, long, and float and one-dimensional arrays of these primitive data types are supported by Aparapi.</li> - <li>Aparapi support for the primitive data type double will depend on your graphics card, driver, and OpenCL version. Aparapi will query the device/platform to determine if double is supported (at runtime). If your platform does not support double, Aparapi will drop back to (Java Thread Pool) (JTP) mode.</li> - <li>The primitive data type char is not supported.</li> - </ul> - - <h2>Fields</h2> - - <ul> - <li>Elements of primitive array fields can be read from kernel code.</li> - <li>Elements of primitive array fields can be written to by kernel code.</li> - <li>Note that Java creates ‘hidden’ fields for captured final primitive arrays (from anonymous inner classes) and they can be accessed as if they were fields of the kernel.</li> - <li>Primitive scalar fields can only be read by the kernel code. Because kernel run-reachable methods execute in parallel in an indeterminate order, any reliance on the result of modifications to primitive scalar fields is discouraged even when executing in Java Thread Pool mode.</li> - <li>Static final fields can be read from kernel code.</li> - <li>Static non-final fields are not supported for either read or write. Try to make them final.</li> - </ul> - - <h2>Arrays</h2> - - <ul> - <li>Only one-dimensional arrays are supported.</li> - <li>Arrays cannot be aliased either by direct local assignment or by passed arguments to other methods.</li> - <li>Java 5’s extended 'for’ syntax for (int i: arrayOfInt){} is not supported, because it causes a shallow copy of the original array under the covers.</li> - </ul> - - <h2>Methods</h2> - - <ul> - <li>References to or through a Java Object other than your kernel instance will cause Aparapi to abandon attempting to create OpenCL (note the following exceptions).</li> - <li>There are a few very specific exceptions to the above rule to allow accesses through getters/setters of objects held in arrays of objects referenced from the kernel code.</li> - <li>Static methods are not supported by Aparapi.</li> - <li>Recursion is not supported, whether direct or indirect. Aparapi tries to detect this recursion statically, but the developer should not rely on Aparapi to do so.</li> - <li>Methods with varargs argument lists are not supported by Aparapi.</li> - <li>Overloaded methods (i.e. methods with the same name but different signatures) are not supported by Aparapi. OpenCL is C99 based so we are constrained by OpenCL’s lack of support for overloading.</li> - <li>The kernel base class contains wrappers around most of the functions offered by java.lang.Math. When run in a thread pool these wrappers delegate back to java.lang.Math when executing in OpenCL they translate to OpenCL equivalents.</li> - </ul> - - <h2>Other Restrictions</h2> - - <ul> - <li>Exceptions are not supported (no throw, catch. or finally).</li> - <li>New is not supported either for arrays or objects</li> - <li>Synchronized blocks and synchronized methods are not supported.</li> - <li>Only simple loops and conditionals are supported; switch, break, and continue are not supported.</li> - <li>A variable cannot have its first assignment be the side effect of an expression evaluation or a method call. For example, the following will not be translated to run on the GPU.</li> - </ul> - <pre class="highlight java"><code> - <span class="kt">int</span> <span class="nf">foo</span><span class="o">(</span><span class="kt">int</span> <span class="n">a</span><span class="o">)</span> <span class="o">{</span> - <span class="c1">// . . .</span> - <span class="o">}</span> - <span class="kd">public</span> <span class="kt">void</span> <span class="nf">run</span><span class="o">()</span> <span class="o">{</span> - <span class="kt">int</span> <span class="n">z</span><span class="o">;</span> - <span class="n">foo</span><span class="o">(</span><span class="n">z</span> <span class="o">=</span> <span class="mi">3</span><span class="o">);</span> - <span class="o">}</span> - </code></pre> - <ul> - <li>This should be regarded as an error which needs to be addressed, as a workaround, explicitly initialize variables (even to 0) when declared.</li> - </ul> - - <h2>Beware Of Side Effects</h2> - - <p>OpenCL is C99-based and as such the result of expressions depending on side effects of other expressions can differ from what one might expect from Java, please avoid using code that assumes Java’s tighter rules. Generally code should be as simple as possible. - For example, although Java explicitly defines</p> - <pre class="highlight java"><code> - <span class="n">arra</span><span class="o">[</span><span class="n">i</span><span class="o">++]</span> <span class="o">=</span> <span class="n">arrb</span><span class="o">[</span><span class="n">i</span><span class="o">++];</span> - </code></pre> - <p>to be equivalent to</p> - <pre class="highlight java"><code> - <span class="n">arra</span><span class="o">[</span><span class="n">i</span><span class="o">]</span> <span class="o">=</span> <span class="n">arrb</span><span class="o">[</span><span class="n">i</span><span class="o">+</span><span class="mi">1</span><span class="o">];</span> - <span class="n">i</span> <span class="o">+=</span> <span class="mi">2</span><span class="o">;</span> - </code></pre> - <p>The C99/OpenCL standard does not define this and so the result would be undefined.</p> - - <h2>Runtime Exceptions</h2> - - <ul> - <li>When run on the GPU, array accesses will not generate an ArrayIndexOutOfBoundsException. Instead the behavior will be unspecified.</li> - <li>When run on the GPU, ArithmeticExceptions will not be generated, for example with integer division by zero. Instead the behavior will be unspecified. - Attribution</li> - </ul> - </div> - </main> - <footer class='page-footer'> - <div class='container'> - <div class='row'> - <div class='col l4 s12'> - <h5 class='white-text'>Help Aparapi Grow</h5> - <p class='grey-text text-lighten-4'>We are a team of volunteers working on this project like it's our full time job. Any amount would help support and continue development on this project and is greatly appreciated.</p> - <form action='https://www.paypal.com/cgi-bin/webscr' id='paypal-donate' method='post' target='_top'> - <input name='cmd' type='hidden' value='_s-xclick'> - <input name='encrypted' type='hidden' value='-----BEGIN PKCS7-----MIIHoAYJKoZIhvcNAQcEoIIHkTCCB40CAQExggEwMIIBLAIBADCBlDCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20CAQAwDQYJKoZIhvcNAQEBBQAEgYATcKxN8t35TG2x34eY272SuZO3QbGy+BTGIM5DRV6Hmosotzw2TF42ceWmbXb3Gk4Wy5kUgo4TgHExCZHUSlHUl+A9KWLFejotgQJPhbiBsnns3klWbKftA3LEnP/kz/SW7OyBlpluoHoEGb354/aoX3JEctp3akHiZEmD7JyEgjELMAkGBSsOAwIaBQAwggEcBgkqhkiG9w0BBwEwFAYIKoZIhvcNAwcECOGCJwba6JICgIH4RtE1LE3juagKs+swI5tb9Y2LacWo+qn1H1aLKeg57bQMqqcWYvkoO1joYoglPc1h4mO0egZjHPQ6ih0K0IYlXw2SRpNylSlIMUE3GW6smjSSwRhscZfXQYUnmQsfYvkFwoKrlZGf/1u0Q7nwlZ1szIKnDMZ5f+k8xBcM0sMNutn/y9CH6A3zo01gQBIF29+1WYAoQspNAnfWQy3ydV7nbjIA9ThDp2WquWw3EVlvqlvm/3C2AFuH/L4q0ltn3qjkCdzXK0O2jW3TRrzligPkAy6CN0Tw2jGW5GENNC1L92vHFH4kBXUPlhvw39TgoN7/KRUjVoYPYgugggOHMIIDgzCCAuygAwIBAgIBADANBgkqhkiG9w0BAQUFADCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20wHhcNMDQwMjEzMTAxMzE1WhcNMzUwMjEzMTAxMzE1WjCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20wgZ8wDQYJKoZIhvcNAQEBBQADgY0AMIGJAoGBAMFHTt38RMxLXJyO2SmS+Ndl72T7oKJ4u4uw+6awntALWh03PewmIJuzbALScsTS4sZoS1fKciBGoh11gIfHzylvkdNe/hJl66/RGqrj5rFb08sAABNTzDTiqqNpJeBsYs/c2aiGozptX2RlnBktH+SUNpAajW724Nv2Wvhif6sFAgMBAAGjge4wgeswHQYDVR0OBBYEFJaffLvGbxe9WT9S1wob7BDWZJRrMIG7BgNVHSMEgbMwgbCAFJaffLvGbxe9WT9S1wob7BDWZJRroYGUpIGRMIGOMQswCQYDVQQGEwJVUzELMAkGA1UECBMCQ0ExFjAUBgNVBAcTDU1vdW50YWluIFZpZXcxFDASBgNVBAoTC1BheVBhbCBJbmMuMRMwEQYDVQQLFApsaXZlX2NlcnRzMREwDwYDVQQDFAhsaXZlX2FwaTEcMBoGCSqGSIb3DQEJARYNcmVAcGF5cGFsLmNvbYIBADAMBgNVHRMEBTADAQH/MA0GCSqGSIb3DQEBBQUAA4GBAIFfOlaagFrl71+jq6OKidbWFSE+Q4FqROvdgIONth+8kSK//Y/4ihuE4Ymvzn5ceE3S/iBSQQMjyvb+s2TWbQYDwcp129OPIbD9epdr4tJOUNiSojw7BHwYRiPh58S1xGlFgHFXwrEBb3dgNbMUa+u4qectsMAXpVHnD9wIyfmHMYIBmjCCAZYCAQEwgZQwgY4xCzAJBgNVBAYTAlVTMQswCQYDVQQIEwJDQTEWMBQGA1UEBxMNTW91bnRhaW4gVmlldzEUMBIGA1UEChMLUGF5UGFsIEluYy4xEzARBgNVBAsUCmxpdmVfY2VydHMxETAPBgNVBAMUCGxpdmVfYXBpMRwwGgYJKoZIhvcNAQkBFg1yZUBwYXlwYWwuY29tAgEAMAkGBSsOAwIaBQCgXTAYBgkqhkiG9w0BCQMxCwYJKoZIhvcNAQcBMBwGCSqGSIb3DQEJBTEPFw0xNjExMjkyMjA1NTNaMCMGCSqGSIb3DQEJBDEWBBS4i3Exr/pFcKOJy8uKmH+nGIMjqDANBgkqhkiG9w0BAQEFAASBgDAbFZ2jieloeB/0wCAcvYCFAIXmmBaMS5js/byzU5gK7exSTlRMX74IkmHemItaOcw3wyFlu4i118D9K9SbSbFiX9DGDcezGh42u/6G8TuZMwlvmiehwMioTVcm4jWG40YLiv8pJZypfoSx2w4IAFb4na5i/E1qOrwQOpiBho+s-----END PKCS7-----'> - <button alt='PayPal - The safer, easier way to pay online!' class='btn waves-effect waves-light red lighten-3' name='action' type='submit'> - Donate Now - </button> - </form> - </div> - <div class='col l4 s12'> - <h5 class='white-text'>Join the Discussion</h5> - <p class='grey-text text-lighten-4'>We have a Gitter chat room set up where you can talk directly with us. Come in and discuss new features, future goals, general problems or questions, or anything else you can think of.</p> - <a class='btn waves-effect waves-light red lighten-3' href='https://gitter.im/Syncleus/aparapi' target='_blank'>Chat</a> - </div> - <div class='col l4 s12' style='overflow: hidden;'> - <h5 class='white-text'>Connect</h5> - <iframe allowtransparency='true' frameborder='0' height='30' scrolling='0' src='http://ghbtns.com/github-btn.html?user=Syncleus&repo=aparapi&type=watch&count=true&size=large' width='170'></iframe> - <br> - <a class='twitter-follow-button' data-dnt='true' data-show-count='true' data-size='large' href='https://twitter.com/AparapiLib'>Follow @AparapiLib</a> - <br> - <div class='g-follow' data-annotation='bubble' data-height='24' data-href='https://plus.google.com/102266131584900704956' data-rel='publisher'></div> - </div> - </div> - </div> - <div class='footer-copyright'> - <div class='container'> - © 2016-2017 Syncleus, All rights reserved. - <a class='grey-text text-lighten-4 right' href='https://github.com/Syncleus/aparapi/blob/master/LICENSE'>Apache License v2</a> - </div> - </div> - </footer> - <!-- Scripts --> - <script src='https://code.jquery.com/jquery-2.1.4.min.js'></script> - <script> - if (!window.jQuery) { document.write('<script src="bin/jquery-2.1.1.min.js"><\/script>'); } - </script> - <script src='/javascripts/jquery.timeago.js'></script> - <script src='/javascripts/materialize.min.js'></script> - <script src='/javascripts/lunr.min.js'></script> - <script src='/javascripts/search.js'></script> - <script src='/javascripts/materialize.js'></script> - <script src='/javascripts/init.js'></script> - <!-- Twitter Button --> - <script> - !function(d,s,id){var js,fjs=d.getElementsByTagName(s)[0],p=/^http:/.test(d.location)?'http':'https';if(!d.getElementById(id)){js=d.createElement(s);js.id=id;js.src=p+'://platform.twitter.com/widgets.js';fjs.parentNode.insertBefore(js,fjs);}}(document, 'script', 'twitter-wjs'); - </script> - <!-- Google Plus Button --> - <script async='' defer='defer' src='https://apis.google.com/js/platform.js'></script> - </body> +<head> +<meta content='text/html; charset=UTF-8' http-equiv='Content-Type'> +<meta content='width=device-width, initial-scale=1' name='viewport'> +<meta content='IE=edge' http-equiv='X-UA-Compatible'> +<meta content='no' name='msapplication-tap-highlight'> +<meta content='Aparapi is an Open-source framework for executing native Java code on the GPU, developed by Syncleus.' name='description'> +<title> +Aparapi | Kernel Guidelines +</title> +<!-- Favicons --> +<link href='/images/favicon/apple-touch-icon-152x152.png' rel='apple-touch-icon-precomposed'> +<meta content='#FFFFFF' name='msapplication-TileColor'> +<meta content='/images/favicon/mstile-144x144.png' name='msapplication-TileImage'> +<link href='/images/favicon/favicon-32x32.png' rel='icon' sizes='32x32'> +<!-- Android 5 Chrome Color --> +<meta content='#EE6E73' name='theme-color'> +<!-- CSS --> +<link href='/stylesheets/highlight.css' media='screen,projection' rel='stylesheet' type='text/css'> +<link href='/stylesheets/style.css' media='screen,projection' rel='stylesheet' type='text/css'> +<link href='http://fonts.googleapis.com/css?family=Inconsolata' rel='stylesheet' type='text/css'> +<link href='http://fonts.googleapis.com/icon?family=Material+Icons' rel='stylesheet'> +</head> +<body> +<header> +<div class='container'> +<a class='button-collapse top-nav waves-effect waves-light circle hide-on-large-only' data-activates='nav-mobile' href='#'> +<i class='material-icons'>menu</i> +</a> +</div> +<ul class='side-nav fixed' id='nav-mobile'> +<li class='logo'> +<a class='brand-logo' href='/' id='logo-container'> +<object data='/images/logo.svg' id='front-page-logo' type='image/svg+xml'>Your browser does not support SVG</object> +</a> +</li> +<li class='search'> +<div class='search-wrapper card'> +<input id='search'> +<i class='material-icons'>search</i> +<div class='search-results'></div> +</div> +</li> +<li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/">Overview</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Introduction</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/about.html">About</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/getting-started.html">Getting Started</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/faq.html">FAQ</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Documentation</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/aparapi-patterns.html">Aparapi Patterns</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/choosing-specific-devices.html">Choosing Specific Devices</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/converting-java-to-opencl.html">Converting Java to OpenCL</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/emulating-multiple-entrypoints.html">Emulating Multiple Entrypoints</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/explicit-buffer-handling.html">Explicit Buffer Handling</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/hsa-enabled-lambda.html">HSA Enabled Lambda</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/kernel-guidelines.html">Kernel Guidelines</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/library-agent-duality.html">Library Agent Duality</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/new-features.html">New Features</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/opencl-bindings.html">OpenCL Bindings</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/private-memory-space.html">Private Memory Space</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/profiling-the-kernel.html">Profiling the Kernel</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/setting-up-hsa.html">Setting Up HSA</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/unit-tests.html">Unit Tests</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/using-hsa-simulator.html">Using HSA Simulator</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/constant-memory.html">Constant Memory</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/local-memory.html">Local Memory</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/multiple-dim-ranges.html">Multiple Dim Ranges</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Proposals</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/multiple-dim-nd-range.html">Multiple Dim ND Range</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/lambdas.html">Lambdas</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/address-space-with-buffers.html">Address Space with Buffers</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/extensions.html">Extensions</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/device.html">Device</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/multiple-entry-points.html">Multiple Entry Points</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/lambda-syntax.html">Lambda Syntax</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/showcase.html">Showcase</a></li></ul></li> +</ul> +</header> +<main> +<div class='section no-pad-bot' id='index-banner'> +<div class='container'> +<h1 class='header center-on-small-only'>Kernel Guidelines</h1> +<div class='row center'> +<h4 class='header col s12 light center'>What code can and can't be converted to OpenCL by Aparapi.</h4> +</div> + +</div> + +</div> +<div class='container'> +<h2>Aparapi Java Kernel Guidelines</h2> + +<p>Certain practices can improve the chances of your Java kernel being converted to OpenCL and executing on a GPU.</p> + +<p>The following guidelines/restrictions only apply to the Kernel.run() method and any method reachable from run() (called†run-reachable methods†in this documentation), clearly any methods executed via a normal Java execution path will not be subject to these restrictions.</p> + +<p>Some restrictions/guidelines may be removed or augmented in a future Aparapi releases.</p> + +<h2>Data Types</h2> + +<ul> +<li>Only the Java primitive data types boolean, byte, short, int, long, and float and one-dimensional arrays of these primitive data types are supported by Aparapi.</li> +<li>Aparapi support for the primitive data type double will depend on your graphics card, driver, and OpenCL version. Aparapi will query the device/platform to determine if double is supported (at runtime). If your platform does not support double, Aparapi will drop back to (Java Thread Pool) (JTP) mode.</li> +<li>The primitive data type char is not supported.</li> +</ul> + +<h2>Fields</h2> + +<ul> +<li>Elements of primitive array fields can be read from kernel code.</li> +<li>Elements of primitive array fields can be written to by kernel code.</li> +<li>Note that Java creates ‘hidden’ fields for captured final primitive arrays (from anonymous inner classes) and they can be accessed as if they were fields of the kernel.</li> +<li>Primitive scalar fields can only be read by the kernel code. Because kernel run-reachable methods execute in parallel in an indeterminate order, any reliance on the result of modifications to primitive scalar fields is discouraged even when executing in Java Thread Pool mode.</li> +<li>Static final fields can be read from kernel code.</li> +<li>Static non-final fields are not supported for either read or write. Try to make them final.</li> +</ul> + +<h2>Arrays</h2> + +<ul> +<li>Only one-dimensional arrays are supported.</li> +<li>Arrays cannot be aliased either by direct local assignment or by passed arguments to other methods.</li> +<li>Java 5’s extended ‘for’ syntax for (int i: arrayOfInt){} is not supported, because it causes a shallow copy of the original array under the covers.</li> +</ul> + +<h2>Methods</h2> + +<ul> +<li>References to or through a Java Object other than your kernel instance will cause Aparapi to abandon attempting to create OpenCL (note the following exceptions).</li> +<li>There are a few very specific exceptions to the above rule to allow accesses through getters/setters of objects held in arrays of objects referenced from the kernel code.</li> +<li>Static methods are not supported by Aparapi.</li> +<li>Recursion is not supported, whether direct or indirect. Aparapi tries to detect this recursion statically, but the developer should not rely on Aparapi to do so.</li> +<li>Methods with varargs argument lists are not supported by Aparapi.</li> +<li>Overloaded methods (i.e. methods with the same name but different signatures) are not supported by Aparapi. OpenCL is C99 based so we are constrained by OpenCL’s lack of support for overloading.</li> +<li>The kernel base class contains wrappers around most of the functions offered by java.lang.Math. When run in a thread pool these wrappers delegate back to java.lang.Math when executing in OpenCL they translate to OpenCL equivalents.</li> +</ul> + +<h2>Other Restrictions</h2> + +<ul> +<li>Exceptions are not supported (no throw, catch. or finally).</li> +<li>New is not supported either for arrays or objects</li> +<li>Synchronized blocks and synchronized methods are not supported.</li> +<li>Only simple loops and conditionals are supported; switch, break, and continue are not supported.</li> +<li>A variable cannot have its first assignment be the side effect of an expression evaluation or a method call. For example, the following will not be translated to run on the GPU.</li> +</ul> +<div class="highlight"><pre class="highlight java"><code> +<span class="kt">int</span> <span class="nf">foo</span><span class="o">(</span><span class="kt">int</span> <span class="n">a</span><span class="o">)</span> <span class="o">{</span> + <span class="c1">// . . .</span> +<span class="o">}</span> +<span class="kd">public</span> <span class="kt">void</span> <span class="nf">run</span><span class="o">()</span> <span class="o">{</span> + <span class="kt">int</span> <span class="n">z</span><span class="o">;</span> + <span class="n">foo</span><span class="o">(</span><span class="n">z</span> <span class="o">=</span> <span class="mi">3</span><span class="o">);</span> +<span class="o">}</span> +</code></pre></div> +<ul> +<li>This should be regarded as an error which needs to be addressed, as a workaround, explicitly initialize variables (even to 0) when declared.</li> +</ul> + +<h2>Beware Of Side Effects</h2> + +<p>OpenCL is C99-based and as such the result of expressions depending on side effects of other expressions can differ from what one might expect from Java, please avoid using code that assumes Java’s tighter rules. Generally code should be as simple as possible. +For example, although Java explicitly defines</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="n">arra</span><span class="o">[</span><span class="n">i</span><span class="o">++]</span> <span class="o">=</span> <span class="n">arrb</span><span class="o">[</span><span class="n">i</span><span class="o">++];</span> +</code></pre></div> +<p>to be equivalent to</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="n">arra</span><span class="o">[</span><span class="n">i</span><span class="o">]</span> <span class="o">=</span> <span class="n">arrb</span><span class="o">[</span><span class="n">i</span><span class="o">+</span><span class="mi">1</span><span class="o">];</span> +<span class="n">i</span> <span class="o">+=</span> <span class="mi">2</span><span class="o">;</span> +</code></pre></div> +<p>The C99/OpenCL standard does not define this and so the result would be undefined.</p> + +<h2>Runtime Exceptions</h2> + +<ul> +<li>When run on the GPU, array accesses will not generate an ArrayIndexOutOfBoundsException. Instead the behavior will be unspecified.</li> +<li>When run on the GPU, ArithmeticExceptions will not be generated, for example with integer division by zero. Instead the behavior will be unspecified. +Attribution</li> +</ul> + +</div> +</main> +<footer class='page-footer'> +<div class='container'> +<div class='row'> +<div class='col l4 s12'> +<h5 class='white-text'>Help Aparapi Grow</h5> +<p class='grey-text text-lighten-4'>We are a team of volunteers working on this project like it's our full time job. Any amount would help support and continue development on this project and is greatly appreciated.</p> +<form action='https://www.paypal.com/cgi-bin/webscr' id='paypal-donate' method='post' target='_top'> +<input name='cmd' type='hidden' value='_s-xclick'> +<input name='encrypted' type='hidden' value='-----BEGIN PKCS7-----MIIHoAYJKoZIhvcNAQcEoIIHkTCCB40CAQExggEwMIIBLAIBADCBlDCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20CAQAwDQYJKoZIhvcNAQEBBQAEgYATcKxN8t35TG2x34eY272SuZO3QbGy+BTGIM5DRV6Hmosotzw2TF42ceWmbXb3Gk4Wy5kUgo4TgHExCZHUSlHUl+A9KWLFejotgQJPhbiBsnns3klWbKftA3LEnP/kz/SW7OyBlpluoHoEGb354/aoX3JEctp3akHiZEmD7JyEgjELMAkGBSsOAwIaBQAwggEcBgkqhkiG9w0BBwEwFAYIKoZIhvcNAwcECOGCJwba6JICgIH4RtE1LE3juagKs+swI5tb9Y2LacWo+qn1H1aLKeg57bQMqqcWYvkoO1joYoglPc1h4mO0egZjHPQ6ih0K0IYlXw2SRpNylSlIMUE3GW6smjSSwRhscZfXQYUnmQsfYvkFwoKrlZGf/1u0Q7nwlZ1szIKnDMZ5f+k8xBcM0sMNutn/y9CH6A3zo01gQBIF29+1WYAoQspNAnfWQy3ydV7nbjIA9ThDp2WquWw3EVlvqlvm/3C2AFuH/L4q0ltn3qjkCdzXK0O2jW3TRrzligPkAy6CN0Tw2jGW5GENNC1L92vHFH4kBXUPlhvw39TgoN7/KRUjVoYPYgugggOHMIIDgzCCAuygAwIBAgIBADANBgkqhkiG9w0BAQUFADCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20wHhcNMDQwMjEzMTAxMzE1WhcNMzUwMjEzMTAxMzE1WjCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20wgZ8wDQYJKoZIhvcNAQEBBQADgY0AMIGJAoGBAMFHTt38RMxLXJyO2SmS+Ndl72T7oKJ4u4uw+6awntALWh03PewmIJuzbALScsTS4sZoS1fKciBGoh11gIfHzylvkdNe/hJl66/RGqrj5rFb08sAABNTzDTiqqNpJeBsYs/c2aiGozptX2RlnBktH+SUNpAajW724Nv2Wvhif6sFAgMBAAGjge4wgeswHQYDVR0OBBYEFJaffLvGbxe9WT9S1wob7BDWZJRrMIG7BgNVHSMEgbMwgbCAFJaffLvGbxe9WT9S1wob7BDWZJRroYGUpIGRMIGOMQswCQYDVQQGEwJVUzELMAkGA1UECBMCQ0ExFjAUBgNVBAcTDU1vdW50YWluIFZpZXcxFDASBgNVBAoTC1BheVBhbCBJbmMuMRMwEQYDVQQLFApsaXZlX2NlcnRzMREwDwYDVQQDFAhsaXZlX2FwaTEcMBoGCSqGSIb3DQEJARYNcmVAcGF5cGFsLmNvbYIBADAMBgNVHRMEBTADAQH/MA0GCSqGSIb3DQEBBQUAA4GBAIFfOlaagFrl71+jq6OKidbWFSE+Q4FqROvdgIONth+8kSK//Y/4ihuE4Ymvzn5ceE3S/iBSQQMjyvb+s2TWbQYDwcp129OPIbD9epdr4tJOUNiSojw7BHwYRiPh58S1xGlFgHFXwrEBb3dgNbMUa+u4qectsMAXpVHnD9wIyfmHMYIBmjCCAZYCAQEwgZQwgY4xCzAJBgNVBAYTAlVTMQswCQYDVQQIEwJDQTEWMBQGA1UEBxMNTW91bnRhaW4gVmlldzEUMBIGA1UEChMLUGF5UGFsIEluYy4xEzARBgNVBAsUCmxpdmVfY2VydHMxETAPBgNVBAMUCGxpdmVfYXBpMRwwGgYJKoZIhvcNAQkBFg1yZUBwYXlwYWwuY29tAgEAMAkGBSsOAwIaBQCgXTAYBgkqhkiG9w0BCQMxCwYJKoZIhvcNAQcBMBwGCSqGSIb3DQEJBTEPFw0xNjExMjkyMjA1NTNaMCMGCSqGSIb3DQEJBDEWBBS4i3Exr/pFcKOJy8uKmH+nGIMjqDANBgkqhkiG9w0BAQEFAASBgDAbFZ2jieloeB/0wCAcvYCFAIXmmBaMS5js/byzU5gK7exSTlRMX74IkmHemItaOcw3wyFlu4i118D9K9SbSbFiX9DGDcezGh42u/6G8TuZMwlvmiehwMioTVcm4jWG40YLiv8pJZypfoSx2w4IAFb4na5i/E1qOrwQOpiBho+s-----END PKCS7----- +'> +<button alt='PayPal - The safer, easier way to pay online!' class='btn waves-effect waves-light red lighten-3' name='action' type='submit'> +Donate Now +</button> +</form> +</div> +<div class='col l4 s12'> +<h5 class='white-text'>Join the Discussion</h5> +<p class='grey-text text-lighten-4'>We have a Gitter chat room set up where you can talk directly with us. Come in and discuss new features, future goals, general problems or questions, or anything else you can think of.</p> +<a class='btn waves-effect waves-light red lighten-3' href='https://gitter.im/Syncleus/aparapi' target='_blank'>Chat</a> +</div> +<div class='col l4 s12' style='overflow: hidden;'> +<h5 class='white-text'>Connect</h5> +<iframe allowtransparency='true' frameborder='0' height='30' scrolling='0' src='http://ghbtns.com/github-btn.html?user=Syncleus&repo=aparapi&type=watch&count=true&size=large' width='170'></iframe> +<br> +<a class='twitter-follow-button' data-dnt='true' data-show-count='true' data-size='large' href='https://twitter.com/AparapiLib'>Follow @AparapiLib</a> +<br> +<div class='g-follow' data-annotation='bubble' data-height='24' data-href='https://plus.google.com/102266131584900704956' data-rel='publisher'></div> +</div> +</div> +</div> +<div class='footer-copyright'> +<div class='container'> +© 2016-2017 Syncleus, All rights reserved. +<a class='grey-text text-lighten-4 right' href='https://github.com/Syncleus/aparapi/blob/master/LICENSE'>Apache License v2</a> +</div> +</div> +</footer> +<!-- Scripts --> +<script src='https://code.jquery.com/jquery-2.1.4.min.js'></script> +<script> + if (!window.jQuery) { document.write('<script src="bin/jquery-2.1.1.min.js"><\/script>'); } +</script> +<script src='/javascripts/jquery.timeago.js'></script> +<script src='/javascripts/materialize.min.js'></script> +<script src='/javascripts/lunr.min.js'></script> +<script src='/javascripts/search.js'></script> +<script src='/javascripts/materialize.js'></script> +<script src='/javascripts/init.js'></script> +<!-- Twitter Button --> +<script> + !function(d,s,id){var js,fjs=d.getElementsByTagName(s)[0],p=/^http:/.test(d.location)?'http':'https';if(!d.getElementById(id)){js=d.createElement(s);js.id=id;js.src=p+'://platform.twitter.com/widgets.js';fjs.parentNode.insertBefore(js,fjs);}}(document, 'script', 'twitter-wjs'); +</script> +<!-- Google Plus Button --> +<script async='' defer='defer' src='https://apis.google.com/js/platform.js'></script> +</body> </html> diff --git a/documentation/library-agent-duality.html b/documentation/library-agent-duality.html index 1188486b125c769fbe8fcb755b6dbd2b1507be8b..5ee114329e68ef87cd98ea488d188f69410b8d01 100644 --- a/documentation/library-agent-duality.html +++ b/documentation/library-agent-duality.html @@ -1,141 +1,143 @@ <!DOCTYPE html> <html lang='en'> - <head> - <meta content='text/html; charset=UTF-8' http-equiv='Content-Type'> - <meta content='width=device-width, initial-scale=1' name='viewport'> - <meta content='IE=edge' http-equiv='X-UA-Compatible'> - <meta content='no' name='msapplication-tap-highlight'> - <meta content='Aparapi is an Open-source framework for executing native Java code on the GPU, developed by Syncleus.' name='description'> - <title> - Aparapi | Library Agent Duality - </title> - <!-- Favicons --> - <link href='/images/favicon/apple-touch-icon-152x152.png' rel='apple-touch-icon-precomposed'> - <meta content='#FFFFFF' name='msapplication-TileColor'> - <meta content='/images/favicon/mstile-144x144.png' name='msapplication-TileImage'> - <link href='/images/favicon/favicon-32x32.png' rel='icon' sizes='32x32'> - <!-- Android 5 Chrome Color --> - <meta content='#EE6E73' name='theme-color'> - <!-- CSS --> - <link href='/stylesheets/highlight.css' media='screen,projection' rel='stylesheet' type='text/css'> - <link href='/stylesheets/style.css' media='screen,projection' rel='stylesheet' type='text/css'> - <link href='http://fonts.googleapis.com/css?family=Inconsolata' rel='stylesheet' type='text/css'> - <link href='http://fonts.googleapis.com/icon?family=Material+Icons' rel='stylesheet'> - </head> - <body> - <header> - <div class='container'> - <a class='button-collapse top-nav waves-effect waves-light circle hide-on-large-only' data-activates='nav-mobile' href='#'> - <i class='material-icons'>menu</i> - </a> - </div> - <ul class='side-nav fixed' id='nav-mobile'> - <li class='logo'> - <a class='brand-logo' href='/' id='logo-container'> - <object data='/images/logo.svg' id='front-page-logo' type='image/svg+xml'>Your browser does not support SVG</object> - </a> - </li> - <li class='search'> - <div class='search-wrapper card'> - <input id='search'> - <i class='material-icons'>search</i> - <div class='search-results'></div> - </div> - </li> - <li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/">Overview</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Introduction</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/about.html">About</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/getting-started.html">Getting Started</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/faq.html">FAQ</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Documentation</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/aparapi-patterns.html">Aparapi Patterns</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/choosing-specific-devices.html">Choosing Specific Devices</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/converting-java-to-opencl.html">Converting Java to OpenCL</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/emulating-multiple-entrypoints.html">Emulating Multiple Entrypoints</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/explicit-buffer-handling.html">Explicit Buffer Handling</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/hsa-enabled-lambda.html">HSA Enabled Lambda</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/kernel-guidelines.html">Kernel Guidelines</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/library-agent-duality.html">Library Agent Duality</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/new-features.html">New Features</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/opencl-bindings.html">OpenCL Bindings</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/private-memory-space.html">Private Memory Space</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/profiling-the-kernel.html">Profiling the Kernel</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/setting-up-hsa.html">Setting Up HSA</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/unit-tests.html">Unit Tests</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/using-hsa-simulator.html">Using HSA Simulator</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/constant-memory.html">Constant Memory</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/local-memory.html">Local Memory</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/multiple-dim-ranges.html">Multiple Dim Ranges</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Proposals</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/multiple-dim-nd-range.html">Multiple Dim ND Range</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/lambdas.html">Lambdas</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/address-space-with-buffers.html">Address Space with Buffers</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/extensions.html">Extensions</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/device.html">Device</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/multiple-entry-points.html">Multiple Entry Points</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/lambda-syntax.html">Lambda Syntax</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/showcase.html">Showcase</a></li></ul></li> - </ul> - </header> - <main> - <div class='section no-pad-bot' id='index-banner'> - <div class='container'> - <h1 class='header center-on-small-only'>Library Agent Duality</h1> - <div class='row center'> - <h4 class='header col s12 light center'>Aparapi libraries can be loaded as JVMTI agents.</h4> - </div> - - </div> - - </div> - <div class='container'> - <h2>What are all these check-ins referring to JVMTI agents?</h2> - - <p>If you have been tracking Aparapi SVN checkins you will have noticed a bunch of changes to JNI code. I just finished arranging for aparapi libraries (.dll or .so) to be able to be loaded as JVMTI agent. Now (assuming library is in ${APARAPI_DIR}) we can either launch using the traditional…</p> - <pre class="highlight shell"><code> - java –Djava.library.path<span class="o">=</span><span class="k">${</span><span class="nv">APARAPI_DIR</span><span class="k">}</span> –classpath <span class="k">${</span><span class="nv">APARAPI_DIR</span><span class="k">}</span>/aparapi.jar;my.jar mypackage.MyClass - </code></pre> - <p>or …</p> - <pre class="highlight shell"><code> - java –agentpath<span class="o">=</span><span class="k">${</span><span class="nv">APARAPI_DIR</span><span class="k">}</span>/aparapi_x86_64.dll –classpath <span class="k">${</span><span class="nv">APARAPI_DIR</span><span class="k">}</span>/aparapi.jar;my.jar mypackage.MyClass - </code></pre> - <p>So the dll/so is now both ‘just a library’ and a JVMTI agent.</p> - - <h2>When would I need an agent?</h2> - - <p>Prevously Aparapi loaded classes that it needed to convert to OpenCL using java.lang.Class.getResourceAsStream(). This only works if we have a jar, or if the classes are on the filesystem somewhere. This approach will not work for ‘synthetically generated classes’.</p> - - <p>There are applications/frameworks which create synthetic classes (at runtime) which would not normally be useable by Aparapi.</p> - - <p>Specifically (and significantly) Java 8 uses synthetic classes to capture args (closure captures) so they can be passed to the final lambda implementation. We needed a way to allow Aparapi to access bytecode of any class, not just those in jars or on the disk.</p> - - <p>A JVMTI agent can register an interest in loaded classes (loaded by the classloader)do this. So when we use the aparapi library in 'agent mode’ it caches all bytes for all loaded classes (yes we could filter by name) and puts this information in a common data structure (should be a map but is a linked list at present).</p> - - <p>By adding a new OpenCLJNI.getBytes(String) JNI method, Aparapi can now retrieve the bytes for any loaded classes, out of this cache.</p> - - <p>So this combined with our ability to parse classes which don’t have line number information should really enable Aparapi to be used with Scala/JRuby/Groovy or other dynamic scripting languages which create classes on the fly.</p> - </div> - </main> - <footer class='page-footer'> - <div class='container'> - <div class='row'> - <div class='col l4 s12'> - <h5 class='white-text'>Help Aparapi Grow</h5> - <p class='grey-text text-lighten-4'>We are a team of volunteers working on this project like it's our full time job. Any amount would help support and continue development on this project and is greatly appreciated.</p> - <form action='https://www.paypal.com/cgi-bin/webscr' id='paypal-donate' method='post' target='_top'> - <input name='cmd' type='hidden' value='_s-xclick'> - <input name='encrypted' type='hidden' value='-----BEGIN PKCS7-----MIIHoAYJKoZIhvcNAQcEoIIHkTCCB40CAQExggEwMIIBLAIBADCBlDCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20CAQAwDQYJKoZIhvcNAQEBBQAEgYATcKxN8t35TG2x34eY272SuZO3QbGy+BTGIM5DRV6Hmosotzw2TF42ceWmbXb3Gk4Wy5kUgo4TgHExCZHUSlHUl+A9KWLFejotgQJPhbiBsnns3klWbKftA3LEnP/kz/SW7OyBlpluoHoEGb354/aoX3JEctp3akHiZEmD7JyEgjELMAkGBSsOAwIaBQAwggEcBgkqhkiG9w0BBwEwFAYIKoZIhvcNAwcECOGCJwba6JICgIH4RtE1LE3juagKs+swI5tb9Y2LacWo+qn1H1aLKeg57bQMqqcWYvkoO1joYoglPc1h4mO0egZjHPQ6ih0K0IYlXw2SRpNylSlIMUE3GW6smjSSwRhscZfXQYUnmQsfYvkFwoKrlZGf/1u0Q7nwlZ1szIKnDMZ5f+k8xBcM0sMNutn/y9CH6A3zo01gQBIF29+1WYAoQspNAnfWQy3ydV7nbjIA9ThDp2WquWw3EVlvqlvm/3C2AFuH/L4q0ltn3qjkCdzXK0O2jW3TRrzligPkAy6CN0Tw2jGW5GENNC1L92vHFH4kBXUPlhvw39TgoN7/KRUjVoYPYgugggOHMIIDgzCCAuygAwIBAgIBADANBgkqhkiG9w0BAQUFADCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20wHhcNMDQwMjEzMTAxMzE1WhcNMzUwMjEzMTAxMzE1WjCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20wgZ8wDQYJKoZIhvcNAQEBBQADgY0AMIGJAoGBAMFHTt38RMxLXJyO2SmS+Ndl72T7oKJ4u4uw+6awntALWh03PewmIJuzbALScsTS4sZoS1fKciBGoh11gIfHzylvkdNe/hJl66/RGqrj5rFb08sAABNTzDTiqqNpJeBsYs/c2aiGozptX2RlnBktH+SUNpAajW724Nv2Wvhif6sFAgMBAAGjge4wgeswHQYDVR0OBBYEFJaffLvGbxe9WT9S1wob7BDWZJRrMIG7BgNVHSMEgbMwgbCAFJaffLvGbxe9WT9S1wob7BDWZJRroYGUpIGRMIGOMQswCQYDVQQGEwJVUzELMAkGA1UECBMCQ0ExFjAUBgNVBAcTDU1vdW50YWluIFZpZXcxFDASBgNVBAoTC1BheVBhbCBJbmMuMRMwEQYDVQQLFApsaXZlX2NlcnRzMREwDwYDVQQDFAhsaXZlX2FwaTEcMBoGCSqGSIb3DQEJARYNcmVAcGF5cGFsLmNvbYIBADAMBgNVHRMEBTADAQH/MA0GCSqGSIb3DQEBBQUAA4GBAIFfOlaagFrl71+jq6OKidbWFSE+Q4FqROvdgIONth+8kSK//Y/4ihuE4Ymvzn5ceE3S/iBSQQMjyvb+s2TWbQYDwcp129OPIbD9epdr4tJOUNiSojw7BHwYRiPh58S1xGlFgHFXwrEBb3dgNbMUa+u4qectsMAXpVHnD9wIyfmHMYIBmjCCAZYCAQEwgZQwgY4xCzAJBgNVBAYTAlVTMQswCQYDVQQIEwJDQTEWMBQGA1UEBxMNTW91bnRhaW4gVmlldzEUMBIGA1UEChMLUGF5UGFsIEluYy4xEzARBgNVBAsUCmxpdmVfY2VydHMxETAPBgNVBAMUCGxpdmVfYXBpMRwwGgYJKoZIhvcNAQkBFg1yZUBwYXlwYWwuY29tAgEAMAkGBSsOAwIaBQCgXTAYBgkqhkiG9w0BCQMxCwYJKoZIhvcNAQcBMBwGCSqGSIb3DQEJBTEPFw0xNjExMjkyMjA1NTNaMCMGCSqGSIb3DQEJBDEWBBS4i3Exr/pFcKOJy8uKmH+nGIMjqDANBgkqhkiG9w0BAQEFAASBgDAbFZ2jieloeB/0wCAcvYCFAIXmmBaMS5js/byzU5gK7exSTlRMX74IkmHemItaOcw3wyFlu4i118D9K9SbSbFiX9DGDcezGh42u/6G8TuZMwlvmiehwMioTVcm4jWG40YLiv8pJZypfoSx2w4IAFb4na5i/E1qOrwQOpiBho+s-----END PKCS7-----'> - <button alt='PayPal - The safer, easier way to pay online!' class='btn waves-effect waves-light red lighten-3' name='action' type='submit'> - Donate Now - </button> - </form> - </div> - <div class='col l4 s12'> - <h5 class='white-text'>Join the Discussion</h5> - <p class='grey-text text-lighten-4'>We have a Gitter chat room set up where you can talk directly with us. Come in and discuss new features, future goals, general problems or questions, or anything else you can think of.</p> - <a class='btn waves-effect waves-light red lighten-3' href='https://gitter.im/Syncleus/aparapi' target='_blank'>Chat</a> - </div> - <div class='col l4 s12' style='overflow: hidden;'> - <h5 class='white-text'>Connect</h5> - <iframe allowtransparency='true' frameborder='0' height='30' scrolling='0' src='http://ghbtns.com/github-btn.html?user=Syncleus&repo=aparapi&type=watch&count=true&size=large' width='170'></iframe> - <br> - <a class='twitter-follow-button' data-dnt='true' data-show-count='true' data-size='large' href='https://twitter.com/AparapiLib'>Follow @AparapiLib</a> - <br> - <div class='g-follow' data-annotation='bubble' data-height='24' data-href='https://plus.google.com/102266131584900704956' data-rel='publisher'></div> - </div> - </div> - </div> - <div class='footer-copyright'> - <div class='container'> - © 2016-2017 Syncleus, All rights reserved. - <a class='grey-text text-lighten-4 right' href='https://github.com/Syncleus/aparapi/blob/master/LICENSE'>Apache License v2</a> - </div> - </div> - </footer> - <!-- Scripts --> - <script src='https://code.jquery.com/jquery-2.1.4.min.js'></script> - <script> - if (!window.jQuery) { document.write('<script src="bin/jquery-2.1.1.min.js"><\/script>'); } - </script> - <script src='/javascripts/jquery.timeago.js'></script> - <script src='/javascripts/materialize.min.js'></script> - <script src='/javascripts/lunr.min.js'></script> - <script src='/javascripts/search.js'></script> - <script src='/javascripts/materialize.js'></script> - <script src='/javascripts/init.js'></script> - <!-- Twitter Button --> - <script> - !function(d,s,id){var js,fjs=d.getElementsByTagName(s)[0],p=/^http:/.test(d.location)?'http':'https';if(!d.getElementById(id)){js=d.createElement(s);js.id=id;js.src=p+'://platform.twitter.com/widgets.js';fjs.parentNode.insertBefore(js,fjs);}}(document, 'script', 'twitter-wjs'); - </script> - <!-- Google Plus Button --> - <script async='' defer='defer' src='https://apis.google.com/js/platform.js'></script> - </body> +<head> +<meta content='text/html; charset=UTF-8' http-equiv='Content-Type'> +<meta content='width=device-width, initial-scale=1' name='viewport'> +<meta content='IE=edge' http-equiv='X-UA-Compatible'> +<meta content='no' name='msapplication-tap-highlight'> +<meta content='Aparapi is an Open-source framework for executing native Java code on the GPU, developed by Syncleus.' name='description'> +<title> +Aparapi | Library Agent Duality +</title> +<!-- Favicons --> +<link href='/images/favicon/apple-touch-icon-152x152.png' rel='apple-touch-icon-precomposed'> +<meta content='#FFFFFF' name='msapplication-TileColor'> +<meta content='/images/favicon/mstile-144x144.png' name='msapplication-TileImage'> +<link href='/images/favicon/favicon-32x32.png' rel='icon' sizes='32x32'> +<!-- Android 5 Chrome Color --> +<meta content='#EE6E73' name='theme-color'> +<!-- CSS --> +<link href='/stylesheets/highlight.css' media='screen,projection' rel='stylesheet' type='text/css'> +<link href='/stylesheets/style.css' media='screen,projection' rel='stylesheet' type='text/css'> +<link href='http://fonts.googleapis.com/css?family=Inconsolata' rel='stylesheet' type='text/css'> +<link href='http://fonts.googleapis.com/icon?family=Material+Icons' rel='stylesheet'> +</head> +<body> +<header> +<div class='container'> +<a class='button-collapse top-nav waves-effect waves-light circle hide-on-large-only' data-activates='nav-mobile' href='#'> +<i class='material-icons'>menu</i> +</a> +</div> +<ul class='side-nav fixed' id='nav-mobile'> +<li class='logo'> +<a class='brand-logo' href='/' id='logo-container'> +<object data='/images/logo.svg' id='front-page-logo' type='image/svg+xml'>Your browser does not support SVG</object> +</a> +</li> +<li class='search'> +<div class='search-wrapper card'> +<input id='search'> +<i class='material-icons'>search</i> +<div class='search-results'></div> +</div> +</li> +<li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/">Overview</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Introduction</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/about.html">About</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/getting-started.html">Getting Started</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/faq.html">FAQ</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Documentation</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/aparapi-patterns.html">Aparapi Patterns</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/choosing-specific-devices.html">Choosing Specific Devices</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/converting-java-to-opencl.html">Converting Java to OpenCL</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/emulating-multiple-entrypoints.html">Emulating Multiple Entrypoints</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/explicit-buffer-handling.html">Explicit Buffer Handling</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/hsa-enabled-lambda.html">HSA Enabled Lambda</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/kernel-guidelines.html">Kernel Guidelines</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/library-agent-duality.html">Library Agent Duality</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/new-features.html">New Features</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/opencl-bindings.html">OpenCL Bindings</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/private-memory-space.html">Private Memory Space</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/profiling-the-kernel.html">Profiling the Kernel</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/setting-up-hsa.html">Setting Up HSA</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/unit-tests.html">Unit Tests</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/using-hsa-simulator.html">Using HSA Simulator</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/constant-memory.html">Constant Memory</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/local-memory.html">Local Memory</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/multiple-dim-ranges.html">Multiple Dim Ranges</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Proposals</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/multiple-dim-nd-range.html">Multiple Dim ND Range</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/lambdas.html">Lambdas</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/address-space-with-buffers.html">Address Space with Buffers</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/extensions.html">Extensions</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/device.html">Device</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/multiple-entry-points.html">Multiple Entry Points</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/lambda-syntax.html">Lambda Syntax</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/showcase.html">Showcase</a></li></ul></li> +</ul> +</header> +<main> +<div class='section no-pad-bot' id='index-banner'> +<div class='container'> +<h1 class='header center-on-small-only'>Library Agent Duality</h1> +<div class='row center'> +<h4 class='header col s12 light center'>Aparapi libraries can be loaded as JVMTI agents.</h4> +</div> + +</div> + +</div> +<div class='container'> +<h2>What are all these check-ins referring to JVMTI agents?</h2> + +<p>If you have been tracking Aparapi SVN checkins you will have noticed a bunch of changes to JNI code. I just finished arranging for aparapi libraries (.dll or .so) to be able to be loaded as JVMTI agent. Now (assuming library is in ${APARAPI_DIR}) we can either launch using the traditional…</p> +<div class="highlight"><pre class="highlight shell"><code> +java –Djava.library.path<span class="o">=</span><span class="k">${</span><span class="nv">APARAPI_DIR</span><span class="k">}</span> –classpath <span class="k">${</span><span class="nv">APARAPI_DIR</span><span class="k">}</span>/aparapi.jar<span class="p">;</span>my.jar mypackage.MyClass +</code></pre></div> +<p>or …</p> +<div class="highlight"><pre class="highlight shell"><code> +java –agentpath<span class="o">=</span><span class="k">${</span><span class="nv">APARAPI_DIR</span><span class="k">}</span>/aparapi_x86_64.dll –classpath <span class="k">${</span><span class="nv">APARAPI_DIR</span><span class="k">}</span>/aparapi.jar<span class="p">;</span>my.jar mypackage.MyClass +</code></pre></div> +<p>So the dll/so is now both ‘just a library’ and a JVMTI agent.</p> + +<h2>When would I need an agent?</h2> + +<p>Prevously Aparapi loaded classes that it needed to convert to OpenCL using java.lang.Class.getResourceAsStream(). This only works if we have a jar, or if the classes are on the filesystem somewhere. This approach will not work for ‘synthetically generated classes’.</p> + +<p>There are applications/frameworks which create synthetic classes (at runtime) which would not normally be useable by Aparapi.</p> + +<p>Specifically (and significantly) Java 8 uses synthetic classes to capture args (closure captures) so they can be passed to the final lambda implementation. We needed a way to allow Aparapi to access bytecode of any class, not just those in jars or on the disk.</p> + +<p>A JVMTI agent can register an interest in loaded classes (loaded by the classloader)do this. So when we use the aparapi library in ‘agent mode’ it caches all bytes for all loaded classes (yes we could filter by name) and puts this information in a common data structure (should be a map but is a linked list at present).</p> + +<p>By adding a new OpenCLJNI.getBytes(String) JNI method, Aparapi can now retrieve the bytes for any loaded classes, out of this cache.</p> + +<p>So this combined with our ability to parse classes which don’t have line number information should really enable Aparapi to be used with Scala/JRuby/Groovy or other dynamic scripting languages which create classes on the fly.</p> + +</div> +</main> +<footer class='page-footer'> +<div class='container'> +<div class='row'> +<div class='col l4 s12'> +<h5 class='white-text'>Help Aparapi Grow</h5> +<p class='grey-text text-lighten-4'>We are a team of volunteers working on this project like it's our full time job. Any amount would help support and continue development on this project and is greatly appreciated.</p> +<form action='https://www.paypal.com/cgi-bin/webscr' id='paypal-donate' method='post' target='_top'> +<input name='cmd' type='hidden' value='_s-xclick'> +<input name='encrypted' type='hidden' value='-----BEGIN PKCS7-----MIIHoAYJKoZIhvcNAQcEoIIHkTCCB40CAQExggEwMIIBLAIBADCBlDCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20CAQAwDQYJKoZIhvcNAQEBBQAEgYATcKxN8t35TG2x34eY272SuZO3QbGy+BTGIM5DRV6Hmosotzw2TF42ceWmbXb3Gk4Wy5kUgo4TgHExCZHUSlHUl+A9KWLFejotgQJPhbiBsnns3klWbKftA3LEnP/kz/SW7OyBlpluoHoEGb354/aoX3JEctp3akHiZEmD7JyEgjELMAkGBSsOAwIaBQAwggEcBgkqhkiG9w0BBwEwFAYIKoZIhvcNAwcECOGCJwba6JICgIH4RtE1LE3juagKs+swI5tb9Y2LacWo+qn1H1aLKeg57bQMqqcWYvkoO1joYoglPc1h4mO0egZjHPQ6ih0K0IYlXw2SRpNylSlIMUE3GW6smjSSwRhscZfXQYUnmQsfYvkFwoKrlZGf/1u0Q7nwlZ1szIKnDMZ5f+k8xBcM0sMNutn/y9CH6A3zo01gQBIF29+1WYAoQspNAnfWQy3ydV7nbjIA9ThDp2WquWw3EVlvqlvm/3C2AFuH/L4q0ltn3qjkCdzXK0O2jW3TRrzligPkAy6CN0Tw2jGW5GENNC1L92vHFH4kBXUPlhvw39TgoN7/KRUjVoYPYgugggOHMIIDgzCCAuygAwIBAgIBADANBgkqhkiG9w0BAQUFADCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20wHhcNMDQwMjEzMTAxMzE1WhcNMzUwMjEzMTAxMzE1WjCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20wgZ8wDQYJKoZIhvcNAQEBBQADgY0AMIGJAoGBAMFHTt38RMxLXJyO2SmS+Ndl72T7oKJ4u4uw+6awntALWh03PewmIJuzbALScsTS4sZoS1fKciBGoh11gIfHzylvkdNe/hJl66/RGqrj5rFb08sAABNTzDTiqqNpJeBsYs/c2aiGozptX2RlnBktH+SUNpAajW724Nv2Wvhif6sFAgMBAAGjge4wgeswHQYDVR0OBBYEFJaffLvGbxe9WT9S1wob7BDWZJRrMIG7BgNVHSMEgbMwgbCAFJaffLvGbxe9WT9S1wob7BDWZJRroYGUpIGRMIGOMQswCQYDVQQGEwJVUzELMAkGA1UECBMCQ0ExFjAUBgNVBAcTDU1vdW50YWluIFZpZXcxFDASBgNVBAoTC1BheVBhbCBJbmMuMRMwEQYDVQQLFApsaXZlX2NlcnRzMREwDwYDVQQDFAhsaXZlX2FwaTEcMBoGCSqGSIb3DQEJARYNcmVAcGF5cGFsLmNvbYIBADAMBgNVHRMEBTADAQH/MA0GCSqGSIb3DQEBBQUAA4GBAIFfOlaagFrl71+jq6OKidbWFSE+Q4FqROvdgIONth+8kSK//Y/4ihuE4Ymvzn5ceE3S/iBSQQMjyvb+s2TWbQYDwcp129OPIbD9epdr4tJOUNiSojw7BHwYRiPh58S1xGlFgHFXwrEBb3dgNbMUa+u4qectsMAXpVHnD9wIyfmHMYIBmjCCAZYCAQEwgZQwgY4xCzAJBgNVBAYTAlVTMQswCQYDVQQIEwJDQTEWMBQGA1UEBxMNTW91bnRhaW4gVmlldzEUMBIGA1UEChMLUGF5UGFsIEluYy4xEzARBgNVBAsUCmxpdmVfY2VydHMxETAPBgNVBAMUCGxpdmVfYXBpMRwwGgYJKoZIhvcNAQkBFg1yZUBwYXlwYWwuY29tAgEAMAkGBSsOAwIaBQCgXTAYBgkqhkiG9w0BCQMxCwYJKoZIhvcNAQcBMBwGCSqGSIb3DQEJBTEPFw0xNjExMjkyMjA1NTNaMCMGCSqGSIb3DQEJBDEWBBS4i3Exr/pFcKOJy8uKmH+nGIMjqDANBgkqhkiG9w0BAQEFAASBgDAbFZ2jieloeB/0wCAcvYCFAIXmmBaMS5js/byzU5gK7exSTlRMX74IkmHemItaOcw3wyFlu4i118D9K9SbSbFiX9DGDcezGh42u/6G8TuZMwlvmiehwMioTVcm4jWG40YLiv8pJZypfoSx2w4IAFb4na5i/E1qOrwQOpiBho+s-----END PKCS7----- +'> +<button alt='PayPal - The safer, easier way to pay online!' class='btn waves-effect waves-light red lighten-3' name='action' type='submit'> +Donate Now +</button> +</form> +</div> +<div class='col l4 s12'> +<h5 class='white-text'>Join the Discussion</h5> +<p class='grey-text text-lighten-4'>We have a Gitter chat room set up where you can talk directly with us. Come in and discuss new features, future goals, general problems or questions, or anything else you can think of.</p> +<a class='btn waves-effect waves-light red lighten-3' href='https://gitter.im/Syncleus/aparapi' target='_blank'>Chat</a> +</div> +<div class='col l4 s12' style='overflow: hidden;'> +<h5 class='white-text'>Connect</h5> +<iframe allowtransparency='true' frameborder='0' height='30' scrolling='0' src='http://ghbtns.com/github-btn.html?user=Syncleus&repo=aparapi&type=watch&count=true&size=large' width='170'></iframe> +<br> +<a class='twitter-follow-button' data-dnt='true' data-show-count='true' data-size='large' href='https://twitter.com/AparapiLib'>Follow @AparapiLib</a> +<br> +<div class='g-follow' data-annotation='bubble' data-height='24' data-href='https://plus.google.com/102266131584900704956' data-rel='publisher'></div> +</div> +</div> +</div> +<div class='footer-copyright'> +<div class='container'> +© 2016-2017 Syncleus, All rights reserved. +<a class='grey-text text-lighten-4 right' href='https://github.com/Syncleus/aparapi/blob/master/LICENSE'>Apache License v2</a> +</div> +</div> +</footer> +<!-- Scripts --> +<script src='https://code.jquery.com/jquery-2.1.4.min.js'></script> +<script> + if (!window.jQuery) { document.write('<script src="bin/jquery-2.1.1.min.js"><\/script>'); } +</script> +<script src='/javascripts/jquery.timeago.js'></script> +<script src='/javascripts/materialize.min.js'></script> +<script src='/javascripts/lunr.min.js'></script> +<script src='/javascripts/search.js'></script> +<script src='/javascripts/materialize.js'></script> +<script src='/javascripts/init.js'></script> +<!-- Twitter Button --> +<script> + !function(d,s,id){var js,fjs=d.getElementsByTagName(s)[0],p=/^http:/.test(d.location)?'http':'https';if(!d.getElementById(id)){js=d.createElement(s);js.id=id;js.src=p+'://platform.twitter.com/widgets.js';fjs.parentNode.insertBefore(js,fjs);}}(document, 'script', 'twitter-wjs'); +</script> +<!-- Google Plus Button --> +<script async='' defer='defer' src='https://apis.google.com/js/platform.js'></script> +</body> </html> diff --git a/documentation/local-memory.html b/documentation/local-memory.html index a41bc212bf475de644c68f980a088c4d0b20cb19..8cc880fb486428bfff4abae5a1e752fe65cc028c 100644 --- a/documentation/local-memory.html +++ b/documentation/local-memory.html @@ -1,308 +1,310 @@ <!DOCTYPE html> <html lang='en'> - <head> - <meta content='text/html; charset=UTF-8' http-equiv='Content-Type'> - <meta content='width=device-width, initial-scale=1' name='viewport'> - <meta content='IE=edge' http-equiv='X-UA-Compatible'> - <meta content='no' name='msapplication-tap-highlight'> - <meta content='Aparapi is an Open-source framework for executing native Java code on the GPU, developed by Syncleus.' name='description'> - <title> - Aparapi | Local Memory - </title> - <!-- Favicons --> - <link href='/images/favicon/apple-touch-icon-152x152.png' rel='apple-touch-icon-precomposed'> - <meta content='#FFFFFF' name='msapplication-TileColor'> - <meta content='/images/favicon/mstile-144x144.png' name='msapplication-TileImage'> - <link href='/images/favicon/favicon-32x32.png' rel='icon' sizes='32x32'> - <!-- Android 5 Chrome Color --> - <meta content='#EE6E73' name='theme-color'> - <!-- CSS --> - <link href='/stylesheets/highlight.css' media='screen,projection' rel='stylesheet' type='text/css'> - <link href='/stylesheets/style.css' media='screen,projection' rel='stylesheet' type='text/css'> - <link href='http://fonts.googleapis.com/css?family=Inconsolata' rel='stylesheet' type='text/css'> - <link href='http://fonts.googleapis.com/icon?family=Material+Icons' rel='stylesheet'> - </head> - <body> - <header> - <div class='container'> - <a class='button-collapse top-nav waves-effect waves-light circle hide-on-large-only' data-activates='nav-mobile' href='#'> - <i class='material-icons'>menu</i> - </a> - </div> - <ul class='side-nav fixed' id='nav-mobile'> - <li class='logo'> - <a class='brand-logo' href='/' id='logo-container'> - <object data='/images/logo.svg' id='front-page-logo' type='image/svg+xml'>Your browser does not support SVG</object> - </a> - </li> - <li class='search'> - <div class='search-wrapper card'> - <input id='search'> - <i class='material-icons'>search</i> - <div class='search-results'></div> - </div> - </li> - <li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/">Overview</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Introduction</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/about.html">About</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/getting-started.html">Getting Started</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/faq.html">FAQ</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Documentation</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/aparapi-patterns.html">Aparapi Patterns</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/choosing-specific-devices.html">Choosing Specific Devices</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/converting-java-to-opencl.html">Converting Java to OpenCL</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/emulating-multiple-entrypoints.html">Emulating Multiple Entrypoints</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/explicit-buffer-handling.html">Explicit Buffer Handling</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/hsa-enabled-lambda.html">HSA Enabled Lambda</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/kernel-guidelines.html">Kernel Guidelines</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/library-agent-duality.html">Library Agent Duality</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/new-features.html">New Features</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/opencl-bindings.html">OpenCL Bindings</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/private-memory-space.html">Private Memory Space</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/profiling-the-kernel.html">Profiling the Kernel</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/setting-up-hsa.html">Setting Up HSA</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/unit-tests.html">Unit Tests</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/using-hsa-simulator.html">Using HSA Simulator</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/constant-memory.html">Constant Memory</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/local-memory.html">Local Memory</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/multiple-dim-ranges.html">Multiple Dim Ranges</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Proposals</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/multiple-dim-nd-range.html">Multiple Dim ND Range</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/lambdas.html">Lambdas</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/address-space-with-buffers.html">Address Space with Buffers</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/extensions.html">Extensions</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/device.html">Device</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/multiple-entry-points.html">Multiple Entry Points</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/lambda-syntax.html">Lambda Syntax</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/showcase.html">Showcase</a></li></ul></li> - </ul> - </header> - <main> - <div class='section no-pad-bot' id='index-banner'> - <div class='container'> - <h1 class='header center-on-small-only'>Local Memory</h1> - <div class='row center'> - <h4 class='header col s12 light center'>How to make use of local memory in a Kernel.</h4> - </div> - - </div> - - </div> - <div class='container'> - <h2>How to make use of new local memory feature</h2> - - <p>By default all primitive arrays accessed by an Aparapi Kernel is considered global. If we look at the generated code using -Dcom.aparapi.enableShowGeneratedOpenCL=true we will see that primitive arrays (such as int buf[]) are mapped to _<em>global pointers (such as _</em>global int *buf) in OpenCL.</p> - - <p>Although this makes Aparapi easy to use (especially to Java developers who are unfamiliar to tiered memory hierarchies), it does limit the ability of the ‘power developer’ wanting to extract more performance from Aparapi on the GPU.</p> - - <p>This <a href="http://www.amd.com/us/products/technologies/stream-technology/opencl/pages/opencl-intro.aspx?cmpid=cp_article_2_2010" rel="nofollow">page</a> from AMD’s website shows the different types of memory that OpenCL programmers can exploit.</p> - - <p>Global memory buffers in Aparapi (primitive Java arrays) are stored in host memory and are copied to Global memory (the RAM of the GPU card).</p> - - <p>Local memory is 'closer’ to the compute devices and not copied from the host memory, it is just allocated for use on the device. The use of local memory on OpenCL can lead to much more performant code as the cost of fetching from local memory is much lower.</p> - - <p>Local memory is shared by all work item’s (kernel instances) executing in the same group. This is why the use of local memory was deferred until we had a satisfactory mechanism for specifying a required group size.</p> - - <p>Aparapi only supports local arrays, not scalers.</p> - - <h2>How to define a primitive array as “local”</h2> - - <p>We have two ways define a local buffer. Either we can decorate the variable name with a _$local$ suffix (yes it is a valid identifier n Java).</p> - <pre class="highlight java"><code> - <span class="kd">final</span> <span class="kt">int</span><span class="o">[]</span> <span class="n">buffer</span> <span class="o">=</span> <span class="k">new</span> <span class="kt">int</span><span class="o">[</span><span class="mi">1024</span><span class="o">];</span> <span class="c1">// this is global accessable to all work items.</span> - <span class="kd">final</span> <span class="kt">int</span><span class="o">[]</span> <span class="n">buffer_$local</span><span class="err">$</span> <span class="o">=</span> <span class="k">new</span> <span class="kt">int</span><span class="o">[</span><span class="mi">1024</span><span class="o">];</span> <span class="c1">// this is a local buffer 1024 int's shared across all work item's in a group</span> - - <span class="n">Kernel</span> <span class="n">k</span> <span class="o">=</span> <span class="k">new</span> <span class="n">Kernel</span><span class="o">(){</span> - <span class="kd">public</span> <span class="kt">void</span> <span class="nf">run</span><span class="o">(){</span> - <span class="c1">// access buffer</span> - <span class="c1">// access buffer_$local$</span> - <span class="n">localBarrier</span><span class="o">();</span> <span class="c1">// allows all writes to buffer_$local$ to be synchronized across all work items in this group</span> - <span class="c1">// ....</span> - <span class="o">}</span> - <span class="o">}</span> - </code></pre> - <p>Alternatively (if defining inside the derived Kernel class - cannot be used via anonymous inner class pattern above!) we can can use the @Local annotation.</p> - <pre class="highlight java"><code> - <span class="kd">final</span> <span class="kt">int</span><span class="o">[]</span> <span class="n">buffer</span> <span class="o">=</span> <span class="k">new</span> <span class="kt">int</span><span class="o">[</span><span class="mi">1024</span><span class="o">];</span> <span class="c1">// this is global accessable to all work items.</span> - - <span class="n">Kernel</span> <span class="n">k</span> <span class="o">=</span> <span class="k">new</span> <span class="n">Kernel</span><span class="o">(){</span> - <span class="nd">@Local</span> <span class="kt">int</span><span class="o">[]</span> <span class="n">localBuffer</span> <span class="o">=</span> <span class="k">new</span> <span class="kt">int</span><span class="o">[</span><span class="mi">1024</span><span class="o">];</span> <span class="c1">// this is a local buffer 1024 int's shared across all work item's in a group</span> - <span class="kd">public</span> <span class="kt">void</span> <span class="nf">run</span><span class="o">(){</span> - <span class="c1">// access buffer</span> - <span class="c1">// access localBuffer</span> - <span class="n">localBarrier</span><span class="o">();</span> <span class="c1">// allows all writes to localBuffer to be synchronized across all work items in this group</span> - <span class="c1">// ....</span> - <span class="o">}</span> - <span class="o">}</span> - </code></pre> - <h2>How do I know how big to make my local buffer?</h2> - - <p>This is where the new Range class helps.</p> - - <p>If we create a Range using:</p> - <pre class="highlight java"><code> - <span class="n">Range</span> <span class="n">rangeWithUndefinedGroupSize</span> <span class="o">=</span> <span class="n">Range</span><span class="o">.</span><span class="na">create</span><span class="o">(</span><span class="mi">1024</span><span class="o">);</span> - </code></pre> - <p>The Aparapi will pick a suitable group size. Generally this will be the highest factor of global size <= 256. So for a global size which is a power of two (and greater or equal to256 ;) ) the group size will be 256.</p> - - <p>Normally the size a local buffer will be some ratio of the group size.</p> - - <p>So if we needed 4 ints per group we might use a sequence such as.</p> - <pre class="highlight java"><code> - <span class="kd">final</span> <span class="kt">int</span><span class="o">[]</span> <span class="n">buffer</span> <span class="o">=</span> <span class="k">new</span> <span class="kt">int</span><span class="o">[</span><span class="mi">8192</span><span class="o">];</span> <span class="c1">// this is global accessable to all work items.</span> - <span class="kd">final</span> <span class="n">Range</span> <span class="n">range</span> <span class="o">=</span> <span class="n">Range</span><span class="o">.</span><span class="na">create</span><span class="o">(</span><span class="n">buffer</span><span class="o">.</span><span class="na">length</span><span class="o">);</span> <span class="c1">// let the runtime pick the group size</span> - - <span class="n">Kernel</span> <span class="n">k</span> <span class="o">=</span> <span class="k">new</span> <span class="n">Kernel</span><span class="o">(){</span> - <span class="nd">@Local</span> <span class="kt">int</span><span class="o">[]</span> <span class="n">localBuffer</span> <span class="o">=</span> <span class="k">new</span> <span class="kt">int</span><span class="o">[</span><span class="n">range</span><span class="o">.</span><span class="na">getLocalSize</span><span class="o">(</span><span class="mi">0</span><span class="o">)*</span><span class="mi">4</span><span class="o">];</span> <span class="c1">// this is a local buffer containing 4 ints per work item in the group</span> - <span class="kd">public</span> <span class="kt">void</span> <span class="nf">run</span><span class="o">(){</span> - <span class="c1">// access buffer</span> - <span class="c1">// access localBuffer</span> - <span class="n">localBarrier</span><span class="o">();</span> <span class="c1">// allows all writes to localBuffer to be synchronized across all work items in this group</span> - <span class="c1">// ....</span> - <span class="o">}</span> - <span class="o">}</span> - </code></pre> - <p>Alternatively you can of course specify your own group size when you create the Range.</p> - <pre class="highlight java"><code> - <span class="kd">final</span> <span class="kt">int</span><span class="o">[]</span> <span class="n">buffer</span> <span class="o">=</span> <span class="k">new</span> <span class="kt">int</span><span class="o">[</span><span class="mi">8192</span><span class="o">];</span> <span class="c1">// this is global accessable to all work items.</span> - <span class="kd">final</span> <span class="n">Range</span> <span class="n">range</span> <span class="o">=</span> <span class="n">Range</span><span class="o">.</span><span class="na">create</span><span class="o">(</span><span class="n">buffer</span><span class="o">.</span><span class="na">length</span><span class="o">,</span><span class="mi">16</span><span class="o">);</span> <span class="c1">// we requested a group size of 16</span> - - <span class="n">Kernel</span> <span class="n">k</span> <span class="o">=</span> <span class="k">new</span> <span class="n">Kernel</span><span class="o">(){</span> - <span class="nd">@Local</span> <span class="kt">int</span><span class="o">[]</span> <span class="n">localBuffer</span> <span class="o">=</span> <span class="k">new</span> <span class="kt">int</span><span class="o">[</span><span class="n">range</span><span class="o">.</span><span class="na">getLocalSize</span><span class="o">(</span><span class="mi">0</span><span class="o">)*</span><span class="mi">4</span><span class="o">];</span> <span class="c1">// this is a local buffer containing 4 ints per work item in the group = 64 ints</span> - <span class="kd">public</span> <span class="kt">void</span> <span class="nf">run</span><span class="o">(){</span> - <span class="c1">// access buffer</span> - <span class="c1">// access localBuffer</span> - <span class="n">localBarrier</span><span class="o">();</span> <span class="c1">// allows all writes to localBuffer to be synchronized across all work items in this group</span> - <span class="c1">// ....</span> - <span class="o">}</span> - <span class="o">}</span> - </code></pre> - <h2>Using barriers</h2> - - <p>As we mentioned above local memory buffers are shared by all work items/kernels executing in the same group. However, to read a value written by another workitem we need to insert a local barrier.</p> - - <p>A common pattern involves having each work item copying a value from global memory in local memory.</p> - <pre class="highlight java"><code> - <span class="n">Kernel</span> <span class="n">k</span> <span class="o">=</span> <span class="k">new</span> <span class="n">Kernel</span><span class="o">(){</span> - <span class="nd">@Local</span> <span class="kt">int</span><span class="o">[]</span> <span class="n">localBuffer</span> <span class="o">=</span> <span class="k">new</span> <span class="kt">int</span><span class="o">[</span><span class="n">range</span><span class="o">.</span><span class="na">getLocalSize</span><span class="o">(</span><span class="mi">0</span><span class="o">)];</span> - <span class="kd">public</span> <span class="kt">void</span> <span class="nf">run</span><span class="o">(){</span> - - <span class="n">localBuffer</span><span class="o">[</span><span class="n">getLocalId</span><span class="o">(</span><span class="mi">0</span><span class="o">)]</span> <span class="o">=</span> <span class="n">globalBuffer</span><span class="o">[</span><span class="n">getGlobalId</span><span class="o">(</span><span class="mi">0</span><span class="o">)];</span> - <span class="n">localBarrier</span><span class="o">();</span> <span class="c1">// after this all kernels can see the data copied by other workitems in this group</span> - <span class="c1">// use localBuffer[0..getLocalSize(0)]</span> - <span class="o">}</span> - <span class="o">}</span> - </code></pre> - <p>Without the barrier above, there is no guarantee that other work items will see mutations to localBuffer from other work items.</p> - - <p>Caution regarding barriers - Barriers can be dangerous. It is up to the developer to ensure that all kernels execute the same # of calls to localBarrier(). Be very careful with conditional code (or code containing loops!), to ensure that each kernel executes the same number of calls to localBarrier().</p> - - <p>The following kernel will deadlock!</p> - <pre class="highlight java"><code> - <span class="n">Kernel</span> <span class="n">kernel</span> <span class="o">=</span> <span class="k">new</span> <span class="n">Kernel</span><span class="o">(){</span> - <span class="kd">public</span> <span class="kt">void</span> <span class="nf">run</span><span class="o">(){</span> - <span class="k">if</span> <span class="o">(</span><span class="n">getGlobalId</span><span class="o">(</span><span class="mi">0</span><span class="o">)></span><span class="mi">10</span><span class="o">){</span> - <span class="c1">// ...</span> - <span class="n">localBarrier</span><span class="o">();</span> - <span class="c1">// ...</span> - <span class="o">}</span> - <span class="o">}</span> - <span class="o">}</span> - </code></pre> - <p>We need to make sure that all kernel’s in a group execute the localBarrier(). So the following will work.</p> - <pre class="highlight java"><code> - <span class="n">Kernel</span> <span class="n">kernel</span> <span class="o">=</span> <span class="k">new</span> <span class="n">Kernel</span><span class="o">(){</span> - <span class="kd">public</span> <span class="kt">void</span> <span class="nf">run</span><span class="o">(){</span> - <span class="k">if</span> <span class="o">(</span><span class="n">getGlobalId</span><span class="o">(</span><span class="mi">0</span><span class="o">)></span><span class="mi">10</span><span class="o">){</span> - <span class="c1">// ...</span> - <span class="n">localBarrier</span><span class="o">();</span> - <span class="c1">// ...</span> - <span class="o">}</span><span class="k">else</span><span class="o">{</span> - <span class="n">localBarrier</span><span class="o">();</span> - <span class="o">}</span> - - <span class="o">}</span> - <span class="o">}</span> - </code></pre> - <p>Of course if we have multiple calls to localBarrier() in the 'if’ side of the if..then then we must match in the 'else’.</p> - <pre class="highlight java"><code> - <span class="n">Kernel</span> <span class="n">kernel</span> <span class="o">=</span> <span class="k">new</span> <span class="n">Kernel</span><span class="o">(){</span> - <span class="kd">public</span> <span class="kt">void</span> <span class="nf">run</span><span class="o">(){</span> - <span class="k">if</span> <span class="o">(</span><span class="n">getGlobalId</span><span class="o">(</span><span class="mi">0</span><span class="o">)></span><span class="mi">10</span><span class="o">){</span> - <span class="c1">// ...</span> - <span class="n">localBarrier</span><span class="o">();</span> - <span class="c1">// ...</span> - <span class="n">localBarrier</span><span class="o">();</span> - <span class="c1">// ...</span> - <span class="o">}</span><span class="k">else</span><span class="o">{</span> - <span class="n">localBarrier</span><span class="o">();</span> - <span class="n">localBarrier</span><span class="o">();</span> - <span class="o">}</span> - - <span class="o">}</span> - <span class="o">}</span> - </code></pre> - <p>With loops we must make sure that each kernel processes any loop the sam e # of times.</p> - - <p>So the following is fine.</p> - <pre class="highlight java"><code> - <span class="n">Kernel</span> <span class="n">kernel</span> <span class="o">=</span> <span class="k">new</span> <span class="n">Kernel</span><span class="o">(){</span> - <span class="kd">public</span> <span class="kt">void</span> <span class="nf">run</span><span class="o">(){</span> - <span class="k">for</span> <span class="o">(</span><span class="kt">int</span> <span class="n">i</span><span class="o">=</span><span class="mi">0</span><span class="o">;</span> <span class="n">i</span><span class="o"><</span> <span class="mi">10</span><span class="o">;</span> <span class="n">i</span><span class="o">++){</span> - <span class="c1">// ...</span> - <span class="n">localBarrier</span><span class="o">();</span> - <span class="c1">// ...</span> - <span class="o">}</span> - <span class="o">}</span> - <span class="o">}</span> - </code></pre> - <p>However the following will deadlock</p> - <pre class="highlight java"><code> - <span class="n">Kernel</span> <span class="n">kernel</span> <span class="o">=</span> <span class="k">new</span> <span class="n">Kernel</span><span class="o">(){</span> - <span class="kd">public</span> <span class="kt">void</span> <span class="nf">run</span><span class="o">(){</span> - <span class="k">for</span> <span class="o">(</span><span class="kt">int</span> <span class="n">i</span><span class="o">=</span><span class="mi">0</span><span class="o">;</span> <span class="n">i</span><span class="o"><</span> <span class="nf">getLocalId</span><span class="o">(</span><span class="mi">0</span><span class="o">);</span> <span class="n">i</span><span class="o">++){</span> - <span class="c1">// ...</span> - <span class="n">localBarrier</span><span class="o">();</span> - <span class="c1">// ...</span> - <span class="o">}</span> - <span class="o">}</span> - <span class="o">}</span> - </code></pre> - <p>As a testament to how well we emulate OpenCL in JTP mode, this will also deadlock your kernel in JTP mode ;) so be careful.</p> - - <p>Performance impact in JTP mode - Of course Java itself does not support local memory in any form. So any time code using local memory falls back to JTP mode we must expect a considerable performance degradation (try the NBody local example in JTP mode).</p> - - <p>We do honor localBarrier() using Java’s barrier from the new concurrency utils. However, Java’s memory model does not require the use of a barrier to observe array changes across threads. So these barriers are basically just an expense.</p> - - <p>I would recommend using local memory and barriers only if I am 90% sure the code will run on the GPU.</p> - - <h2>Can I see some code?</h2> - - <p>There is a version of NBody example which uses local memory, the source can be <a href="https://github.com/Syncleus/aparapi-examples/blob/master/src/main/java/com/aparapi/examples/nbody/Local.java" rel="nofollow">found here</a>.</p> - </div> - </main> - <footer class='page-footer'> - <div class='container'> - <div class='row'> - <div class='col l4 s12'> - <h5 class='white-text'>Help Aparapi Grow</h5> - <p class='grey-text text-lighten-4'>We are a team of volunteers working on this project like it's our full time job. Any amount would help support and continue development on this project and is greatly appreciated.</p> - <form action='https://www.paypal.com/cgi-bin/webscr' id='paypal-donate' method='post' target='_top'> - <input name='cmd' type='hidden' value='_s-xclick'> - <input name='encrypted' type='hidden' value='-----BEGIN PKCS7-----MIIHoAYJKoZIhvcNAQcEoIIHkTCCB40CAQExggEwMIIBLAIBADCBlDCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20CAQAwDQYJKoZIhvcNAQEBBQAEgYATcKxN8t35TG2x34eY272SuZO3QbGy+BTGIM5DRV6Hmosotzw2TF42ceWmbXb3Gk4Wy5kUgo4TgHExCZHUSlHUl+A9KWLFejotgQJPhbiBsnns3klWbKftA3LEnP/kz/SW7OyBlpluoHoEGb354/aoX3JEctp3akHiZEmD7JyEgjELMAkGBSsOAwIaBQAwggEcBgkqhkiG9w0BBwEwFAYIKoZIhvcNAwcECOGCJwba6JICgIH4RtE1LE3juagKs+swI5tb9Y2LacWo+qn1H1aLKeg57bQMqqcWYvkoO1joYoglPc1h4mO0egZjHPQ6ih0K0IYlXw2SRpNylSlIMUE3GW6smjSSwRhscZfXQYUnmQsfYvkFwoKrlZGf/1u0Q7nwlZ1szIKnDMZ5f+k8xBcM0sMNutn/y9CH6A3zo01gQBIF29+1WYAoQspNAnfWQy3ydV7nbjIA9ThDp2WquWw3EVlvqlvm/3C2AFuH/L4q0ltn3qjkCdzXK0O2jW3TRrzligPkAy6CN0Tw2jGW5GENNC1L92vHFH4kBXUPlhvw39TgoN7/KRUjVoYPYgugggOHMIIDgzCCAuygAwIBAgIBADANBgkqhkiG9w0BAQUFADCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20wHhcNMDQwMjEzMTAxMzE1WhcNMzUwMjEzMTAxMzE1WjCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20wgZ8wDQYJKoZIhvcNAQEBBQADgY0AMIGJAoGBAMFHTt38RMxLXJyO2SmS+Ndl72T7oKJ4u4uw+6awntALWh03PewmIJuzbALScsTS4sZoS1fKciBGoh11gIfHzylvkdNe/hJl66/RGqrj5rFb08sAABNTzDTiqqNpJeBsYs/c2aiGozptX2RlnBktH+SUNpAajW724Nv2Wvhif6sFAgMBAAGjge4wgeswHQYDVR0OBBYEFJaffLvGbxe9WT9S1wob7BDWZJRrMIG7BgNVHSMEgbMwgbCAFJaffLvGbxe9WT9S1wob7BDWZJRroYGUpIGRMIGOMQswCQYDVQQGEwJVUzELMAkGA1UECBMCQ0ExFjAUBgNVBAcTDU1vdW50YWluIFZpZXcxFDASBgNVBAoTC1BheVBhbCBJbmMuMRMwEQYDVQQLFApsaXZlX2NlcnRzMREwDwYDVQQDFAhsaXZlX2FwaTEcMBoGCSqGSIb3DQEJARYNcmVAcGF5cGFsLmNvbYIBADAMBgNVHRMEBTADAQH/MA0GCSqGSIb3DQEBBQUAA4GBAIFfOlaagFrl71+jq6OKidbWFSE+Q4FqROvdgIONth+8kSK//Y/4ihuE4Ymvzn5ceE3S/iBSQQMjyvb+s2TWbQYDwcp129OPIbD9epdr4tJOUNiSojw7BHwYRiPh58S1xGlFgHFXwrEBb3dgNbMUa+u4qectsMAXpVHnD9wIyfmHMYIBmjCCAZYCAQEwgZQwgY4xCzAJBgNVBAYTAlVTMQswCQYDVQQIEwJDQTEWMBQGA1UEBxMNTW91bnRhaW4gVmlldzEUMBIGA1UEChMLUGF5UGFsIEluYy4xEzARBgNVBAsUCmxpdmVfY2VydHMxETAPBgNVBAMUCGxpdmVfYXBpMRwwGgYJKoZIhvcNAQkBFg1yZUBwYXlwYWwuY29tAgEAMAkGBSsOAwIaBQCgXTAYBgkqhkiG9w0BCQMxCwYJKoZIhvcNAQcBMBwGCSqGSIb3DQEJBTEPFw0xNjExMjkyMjA1NTNaMCMGCSqGSIb3DQEJBDEWBBS4i3Exr/pFcKOJy8uKmH+nGIMjqDANBgkqhkiG9w0BAQEFAASBgDAbFZ2jieloeB/0wCAcvYCFAIXmmBaMS5js/byzU5gK7exSTlRMX74IkmHemItaOcw3wyFlu4i118D9K9SbSbFiX9DGDcezGh42u/6G8TuZMwlvmiehwMioTVcm4jWG40YLiv8pJZypfoSx2w4IAFb4na5i/E1qOrwQOpiBho+s-----END PKCS7-----'> - <button alt='PayPal - The safer, easier way to pay online!' class='btn waves-effect waves-light red lighten-3' name='action' type='submit'> - Donate Now - </button> - </form> - </div> - <div class='col l4 s12'> - <h5 class='white-text'>Join the Discussion</h5> - <p class='grey-text text-lighten-4'>We have a Gitter chat room set up where you can talk directly with us. Come in and discuss new features, future goals, general problems or questions, or anything else you can think of.</p> - <a class='btn waves-effect waves-light red lighten-3' href='https://gitter.im/Syncleus/aparapi' target='_blank'>Chat</a> - </div> - <div class='col l4 s12' style='overflow: hidden;'> - <h5 class='white-text'>Connect</h5> - <iframe allowtransparency='true' frameborder='0' height='30' scrolling='0' src='http://ghbtns.com/github-btn.html?user=Syncleus&repo=aparapi&type=watch&count=true&size=large' width='170'></iframe> - <br> - <a class='twitter-follow-button' data-dnt='true' data-show-count='true' data-size='large' href='https://twitter.com/AparapiLib'>Follow @AparapiLib</a> - <br> - <div class='g-follow' data-annotation='bubble' data-height='24' data-href='https://plus.google.com/102266131584900704956' data-rel='publisher'></div> - </div> - </div> - </div> - <div class='footer-copyright'> - <div class='container'> - © 2016-2017 Syncleus, All rights reserved. - <a class='grey-text text-lighten-4 right' href='https://github.com/Syncleus/aparapi/blob/master/LICENSE'>Apache License v2</a> - </div> - </div> - </footer> - <!-- Scripts --> - <script src='https://code.jquery.com/jquery-2.1.4.min.js'></script> - <script> - if (!window.jQuery) { document.write('<script src="bin/jquery-2.1.1.min.js"><\/script>'); } - </script> - <script src='/javascripts/jquery.timeago.js'></script> - <script src='/javascripts/materialize.min.js'></script> - <script src='/javascripts/lunr.min.js'></script> - <script src='/javascripts/search.js'></script> - <script src='/javascripts/materialize.js'></script> - <script src='/javascripts/init.js'></script> - <!-- Twitter Button --> - <script> - !function(d,s,id){var js,fjs=d.getElementsByTagName(s)[0],p=/^http:/.test(d.location)?'http':'https';if(!d.getElementById(id)){js=d.createElement(s);js.id=id;js.src=p+'://platform.twitter.com/widgets.js';fjs.parentNode.insertBefore(js,fjs);}}(document, 'script', 'twitter-wjs'); - </script> - <!-- Google Plus Button --> - <script async='' defer='defer' src='https://apis.google.com/js/platform.js'></script> - </body> +<head> +<meta content='text/html; charset=UTF-8' http-equiv='Content-Type'> +<meta content='width=device-width, initial-scale=1' name='viewport'> +<meta content='IE=edge' http-equiv='X-UA-Compatible'> +<meta content='no' name='msapplication-tap-highlight'> +<meta content='Aparapi is an Open-source framework for executing native Java code on the GPU, developed by Syncleus.' name='description'> +<title> +Aparapi | Local Memory +</title> +<!-- Favicons --> +<link href='/images/favicon/apple-touch-icon-152x152.png' rel='apple-touch-icon-precomposed'> +<meta content='#FFFFFF' name='msapplication-TileColor'> +<meta content='/images/favicon/mstile-144x144.png' name='msapplication-TileImage'> +<link href='/images/favicon/favicon-32x32.png' rel='icon' sizes='32x32'> +<!-- Android 5 Chrome Color --> +<meta content='#EE6E73' name='theme-color'> +<!-- CSS --> +<link href='/stylesheets/highlight.css' media='screen,projection' rel='stylesheet' type='text/css'> +<link href='/stylesheets/style.css' media='screen,projection' rel='stylesheet' type='text/css'> +<link href='http://fonts.googleapis.com/css?family=Inconsolata' rel='stylesheet' type='text/css'> +<link href='http://fonts.googleapis.com/icon?family=Material+Icons' rel='stylesheet'> +</head> +<body> +<header> +<div class='container'> +<a class='button-collapse top-nav waves-effect waves-light circle hide-on-large-only' data-activates='nav-mobile' href='#'> +<i class='material-icons'>menu</i> +</a> +</div> +<ul class='side-nav fixed' id='nav-mobile'> +<li class='logo'> +<a class='brand-logo' href='/' id='logo-container'> +<object data='/images/logo.svg' id='front-page-logo' type='image/svg+xml'>Your browser does not support SVG</object> +</a> +</li> +<li class='search'> +<div class='search-wrapper card'> +<input id='search'> +<i class='material-icons'>search</i> +<div class='search-results'></div> +</div> +</li> +<li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/">Overview</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Introduction</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/about.html">About</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/getting-started.html">Getting Started</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/faq.html">FAQ</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Documentation</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/aparapi-patterns.html">Aparapi Patterns</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/choosing-specific-devices.html">Choosing Specific Devices</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/converting-java-to-opencl.html">Converting Java to OpenCL</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/emulating-multiple-entrypoints.html">Emulating Multiple Entrypoints</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/explicit-buffer-handling.html">Explicit Buffer Handling</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/hsa-enabled-lambda.html">HSA Enabled Lambda</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/kernel-guidelines.html">Kernel Guidelines</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/library-agent-duality.html">Library Agent Duality</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/new-features.html">New Features</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/opencl-bindings.html">OpenCL Bindings</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/private-memory-space.html">Private Memory Space</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/profiling-the-kernel.html">Profiling the Kernel</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/setting-up-hsa.html">Setting Up HSA</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/unit-tests.html">Unit Tests</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/using-hsa-simulator.html">Using HSA Simulator</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/constant-memory.html">Constant Memory</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/local-memory.html">Local Memory</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/multiple-dim-ranges.html">Multiple Dim Ranges</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Proposals</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/multiple-dim-nd-range.html">Multiple Dim ND Range</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/lambdas.html">Lambdas</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/address-space-with-buffers.html">Address Space with Buffers</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/extensions.html">Extensions</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/device.html">Device</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/multiple-entry-points.html">Multiple Entry Points</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/lambda-syntax.html">Lambda Syntax</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/showcase.html">Showcase</a></li></ul></li> +</ul> +</header> +<main> +<div class='section no-pad-bot' id='index-banner'> +<div class='container'> +<h1 class='header center-on-small-only'>Local Memory</h1> +<div class='row center'> +<h4 class='header col s12 light center'>How to make use of local memory in a Kernel.</h4> +</div> + +</div> + +</div> +<div class='container'> +<h2>How to make use of new local memory feature</h2> + +<p>By default all primitive arrays accessed by an Aparapi Kernel is considered global. If we look at the generated code using -Dcom.aparapi.enableShowGeneratedOpenCL=true we will see that primitive arrays (such as int buf[]) are mapped to _<em>global pointers (such as _</em>global int *buf) in OpenCL.</p> + +<p>Although this makes Aparapi easy to use (especially to Java developers who are unfamiliar to tiered memory hierarchies), it does limit the ability of the ‘power developer’ wanting to extract more performance from Aparapi on the GPU.</p> + +<p>This <a href="http://www.amd.com/us/products/technologies/stream-technology/opencl/pages/opencl-intro.aspx?cmpid=cp_article_2_2010" rel="nofollow">page</a> from AMD’s website shows the different types of memory that OpenCL programmers can exploit.</p> + +<p>Global memory buffers in Aparapi (primitive Java arrays) are stored in host memory and are copied to Global memory (the RAM of the GPU card).</p> + +<p>Local memory is ‘closer’ to the compute devices and not copied from the host memory, it is just allocated for use on the device. The use of local memory on OpenCL can lead to much more performant code as the cost of fetching from local memory is much lower.</p> + +<p>Local memory is shared by all work item’s (kernel instances) executing in the same group. This is why the use of local memory was deferred until we had a satisfactory mechanism for specifying a required group size.</p> + +<p>Aparapi only supports local arrays, not scalers.</p> + +<h2>How to define a primitive array as “local”</h2> + +<p>We have two ways define a local buffer. Either we can decorate the variable name with a _$local$ suffix (yes it is a valid identifier n Java).</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="kd">final</span> <span class="kt">int</span><span class="o">[]</span> <span class="n">buffer</span> <span class="o">=</span> <span class="k">new</span> <span class="kt">int</span><span class="o">[</span><span class="mi">1024</span><span class="o">];</span> <span class="c1">// this is global accessable to all work items.</span> +<span class="kd">final</span> <span class="kt">int</span><span class="o">[]</span> <span class="n">buffer_$local</span><span class="err">$</span> <span class="o">=</span> <span class="k">new</span> <span class="kt">int</span><span class="o">[</span><span class="mi">1024</span><span class="o">];</span> <span class="c1">// this is a local buffer 1024 int's shared across all work item's in a group</span> + +<span class="n">Kernel</span> <span class="n">k</span> <span class="o">=</span> <span class="k">new</span> <span class="n">Kernel</span><span class="o">(){</span> + <span class="kd">public</span> <span class="kt">void</span> <span class="nf">run</span><span class="o">(){</span> + <span class="c1">// access buffer</span> + <span class="c1">// access buffer_$local$</span> + <span class="n">localBarrier</span><span class="o">();</span> <span class="c1">// allows all writes to buffer_$local$ to be synchronized across all work items in this group</span> + <span class="c1">// ....</span> + <span class="o">}</span> +<span class="o">}</span> +</code></pre></div> +<p>Alternatively (if defining inside the derived Kernel class - cannot be used via anonymous inner class pattern above!) we can can use the @Local annotation.</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="kd">final</span> <span class="kt">int</span><span class="o">[]</span> <span class="n">buffer</span> <span class="o">=</span> <span class="k">new</span> <span class="kt">int</span><span class="o">[</span><span class="mi">1024</span><span class="o">];</span> <span class="c1">// this is global accessable to all work items.</span> + +<span class="n">Kernel</span> <span class="n">k</span> <span class="o">=</span> <span class="k">new</span> <span class="n">Kernel</span><span class="o">(){</span> + <span class="nd">@Local</span> <span class="kt">int</span><span class="o">[]</span> <span class="n">localBuffer</span> <span class="o">=</span> <span class="k">new</span> <span class="kt">int</span><span class="o">[</span><span class="mi">1024</span><span class="o">];</span> <span class="c1">// this is a local buffer 1024 int's shared across all work item's in a group</span> + <span class="kd">public</span> <span class="kt">void</span> <span class="nf">run</span><span class="o">(){</span> + <span class="c1">// access buffer</span> + <span class="c1">// access localBuffer</span> + <span class="n">localBarrier</span><span class="o">();</span> <span class="c1">// allows all writes to localBuffer to be synchronized across all work items in this group</span> + <span class="c1">// ....</span> + <span class="o">}</span> +<span class="o">}</span> +</code></pre></div> +<h2>How do I know how big to make my local buffer?</h2> + +<p>This is where the new Range class helps.</p> + +<p>If we create a Range using:</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="n">Range</span> <span class="n">rangeWithUndefinedGroupSize</span> <span class="o">=</span> <span class="n">Range</span><span class="o">.</span><span class="na">create</span><span class="o">(</span><span class="mi">1024</span><span class="o">);</span> +</code></pre></div> +<p>The Aparapi will pick a suitable group size. Generally this will be the highest factor of global size <= 256. So for a global size which is a power of two (and greater or equal to256 ;) ) the group size will be 256.</p> + +<p>Normally the size a local buffer will be some ratio of the group size.</p> + +<p>So if we needed 4 ints per group we might use a sequence such as.</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="kd">final</span> <span class="kt">int</span><span class="o">[]</span> <span class="n">buffer</span> <span class="o">=</span> <span class="k">new</span> <span class="kt">int</span><span class="o">[</span><span class="mi">8192</span><span class="o">];</span> <span class="c1">// this is global accessable to all work items.</span> +<span class="kd">final</span> <span class="n">Range</span> <span class="n">range</span> <span class="o">=</span> <span class="n">Range</span><span class="o">.</span><span class="na">create</span><span class="o">(</span><span class="n">buffer</span><span class="o">.</span><span class="na">length</span><span class="o">);</span> <span class="c1">// let the runtime pick the group size</span> + +<span class="n">Kernel</span> <span class="n">k</span> <span class="o">=</span> <span class="k">new</span> <span class="n">Kernel</span><span class="o">(){</span> + <span class="nd">@Local</span> <span class="kt">int</span><span class="o">[]</span> <span class="n">localBuffer</span> <span class="o">=</span> <span class="k">new</span> <span class="kt">int</span><span class="o">[</span><span class="n">range</span><span class="o">.</span><span class="na">getLocalSize</span><span class="o">(</span><span class="mi">0</span><span class="o">)*</span><span class="mi">4</span><span class="o">];</span> <span class="c1">// this is a local buffer containing 4 ints per work item in the group</span> + <span class="kd">public</span> <span class="kt">void</span> <span class="nf">run</span><span class="o">(){</span> + <span class="c1">// access buffer</span> + <span class="c1">// access localBuffer</span> + <span class="n">localBarrier</span><span class="o">();</span> <span class="c1">// allows all writes to localBuffer to be synchronized across all work items in this group</span> + <span class="c1">// ....</span> + <span class="o">}</span> +<span class="o">}</span> +</code></pre></div> +<p>Alternatively you can of course specify your own group size when you create the Range.</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="kd">final</span> <span class="kt">int</span><span class="o">[]</span> <span class="n">buffer</span> <span class="o">=</span> <span class="k">new</span> <span class="kt">int</span><span class="o">[</span><span class="mi">8192</span><span class="o">];</span> <span class="c1">// this is global accessable to all work items.</span> +<span class="kd">final</span> <span class="n">Range</span> <span class="n">range</span> <span class="o">=</span> <span class="n">Range</span><span class="o">.</span><span class="na">create</span><span class="o">(</span><span class="n">buffer</span><span class="o">.</span><span class="na">length</span><span class="o">,</span><span class="mi">16</span><span class="o">);</span> <span class="c1">// we requested a group size of 16</span> + +<span class="n">Kernel</span> <span class="n">k</span> <span class="o">=</span> <span class="k">new</span> <span class="n">Kernel</span><span class="o">(){</span> + <span class="nd">@Local</span> <span class="kt">int</span><span class="o">[]</span> <span class="n">localBuffer</span> <span class="o">=</span> <span class="k">new</span> <span class="kt">int</span><span class="o">[</span><span class="n">range</span><span class="o">.</span><span class="na">getLocalSize</span><span class="o">(</span><span class="mi">0</span><span class="o">)*</span><span class="mi">4</span><span class="o">];</span> <span class="c1">// this is a local buffer containing 4 ints per work item in the group = 64 ints</span> + <span class="kd">public</span> <span class="kt">void</span> <span class="nf">run</span><span class="o">(){</span> + <span class="c1">// access buffer</span> + <span class="c1">// access localBuffer</span> + <span class="n">localBarrier</span><span class="o">();</span> <span class="c1">// allows all writes to localBuffer to be synchronized across all work items in this group</span> + <span class="c1">// ....</span> + <span class="o">}</span> +<span class="o">}</span> +</code></pre></div> +<h2>Using barriers</h2> + +<p>As we mentioned above local memory buffers are shared by all work items/kernels executing in the same group. However, to read a value written by another workitem we need to insert a local barrier.</p> + +<p>A common pattern involves having each work item copying a value from global memory in local memory.</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="n">Kernel</span> <span class="n">k</span> <span class="o">=</span> <span class="k">new</span> <span class="n">Kernel</span><span class="o">(){</span> + <span class="nd">@Local</span> <span class="kt">int</span><span class="o">[]</span> <span class="n">localBuffer</span> <span class="o">=</span> <span class="k">new</span> <span class="kt">int</span><span class="o">[</span><span class="n">range</span><span class="o">.</span><span class="na">getLocalSize</span><span class="o">(</span><span class="mi">0</span><span class="o">)];</span> + <span class="kd">public</span> <span class="kt">void</span> <span class="nf">run</span><span class="o">(){</span> + + <span class="n">localBuffer</span><span class="o">[</span><span class="n">getLocalId</span><span class="o">(</span><span class="mi">0</span><span class="o">)]</span> <span class="o">=</span> <span class="n">globalBuffer</span><span class="o">[</span><span class="n">getGlobalId</span><span class="o">(</span><span class="mi">0</span><span class="o">)];</span> + <span class="n">localBarrier</span><span class="o">();</span> <span class="c1">// after this all kernels can see the data copied by other workitems in this group</span> + <span class="c1">// use localBuffer[0..getLocalSize(0)]</span> + <span class="o">}</span> +<span class="o">}</span> +</code></pre></div> +<p>Without the barrier above, there is no guarantee that other work items will see mutations to localBuffer from other work items.</p> + +<p>Caution regarding barriers +Barriers can be dangerous. It is up to the developer to ensure that all kernels execute the same # of calls to localBarrier(). Be very careful with conditional code (or code containing loops!), to ensure that each kernel executes the same number of calls to localBarrier().</p> + +<p>The following kernel will deadlock!</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="n">Kernel</span> <span class="n">kernel</span> <span class="o">=</span> <span class="k">new</span> <span class="n">Kernel</span><span class="o">(){</span> + <span class="kd">public</span> <span class="kt">void</span> <span class="nf">run</span><span class="o">(){</span> + <span class="k">if</span> <span class="o">(</span><span class="n">getGlobalId</span><span class="o">(</span><span class="mi">0</span><span class="o">)></span><span class="mi">10</span><span class="o">){</span> + <span class="c1">// ...</span> + <span class="n">localBarrier</span><span class="o">();</span> + <span class="c1">// ...</span> + <span class="o">}</span> + <span class="o">}</span> +<span class="o">}</span> +</code></pre></div> +<p>We need to make sure that all kernel’s in a group execute the localBarrier(). So the following will work.</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="n">Kernel</span> <span class="n">kernel</span> <span class="o">=</span> <span class="k">new</span> <span class="n">Kernel</span><span class="o">(){</span> + <span class="kd">public</span> <span class="kt">void</span> <span class="nf">run</span><span class="o">(){</span> + <span class="k">if</span> <span class="o">(</span><span class="n">getGlobalId</span><span class="o">(</span><span class="mi">0</span><span class="o">)></span><span class="mi">10</span><span class="o">){</span> + <span class="c1">// ...</span> + <span class="n">localBarrier</span><span class="o">();</span> + <span class="c1">// ...</span> + <span class="o">}</span><span class="k">else</span><span class="o">{</span> + <span class="n">localBarrier</span><span class="o">();</span> + <span class="o">}</span> + + <span class="o">}</span> +<span class="o">}</span> +</code></pre></div> +<p>Of course if we have multiple calls to localBarrier() in the ‘if’ side of the if..then then we must match in the ‘else’.</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="n">Kernel</span> <span class="n">kernel</span> <span class="o">=</span> <span class="k">new</span> <span class="n">Kernel</span><span class="o">(){</span> + <span class="kd">public</span> <span class="kt">void</span> <span class="nf">run</span><span class="o">(){</span> + <span class="k">if</span> <span class="o">(</span><span class="n">getGlobalId</span><span class="o">(</span><span class="mi">0</span><span class="o">)></span><span class="mi">10</span><span class="o">){</span> + <span class="c1">// ...</span> + <span class="n">localBarrier</span><span class="o">();</span> + <span class="c1">// ...</span> + <span class="n">localBarrier</span><span class="o">();</span> + <span class="c1">// ...</span> + <span class="o">}</span><span class="k">else</span><span class="o">{</span> + <span class="n">localBarrier</span><span class="o">();</span> + <span class="n">localBarrier</span><span class="o">();</span> + <span class="o">}</span> + + <span class="o">}</span> +<span class="o">}</span> +</code></pre></div> +<p>With loops we must make sure that each kernel processes any loop the sam e # of times.</p> + +<p>So the following is fine.</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="n">Kernel</span> <span class="n">kernel</span> <span class="o">=</span> <span class="k">new</span> <span class="n">Kernel</span><span class="o">(){</span> + <span class="kd">public</span> <span class="kt">void</span> <span class="nf">run</span><span class="o">(){</span> + <span class="k">for</span> <span class="o">(</span><span class="kt">int</span> <span class="n">i</span><span class="o">=</span><span class="mi">0</span><span class="o">;</span> <span class="n">i</span><span class="o"><</span> <span class="mi">10</span><span class="o">;</span> <span class="n">i</span><span class="o">++){</span> + <span class="c1">// ...</span> + <span class="n">localBarrier</span><span class="o">();</span> + <span class="c1">// ...</span> + <span class="o">}</span> + <span class="o">}</span> +<span class="o">}</span> +</code></pre></div> +<p>However the following will deadlock</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="n">Kernel</span> <span class="n">kernel</span> <span class="o">=</span> <span class="k">new</span> <span class="n">Kernel</span><span class="o">(){</span> + <span class="kd">public</span> <span class="kt">void</span> <span class="nf">run</span><span class="o">(){</span> + <span class="k">for</span> <span class="o">(</span><span class="kt">int</span> <span class="n">i</span><span class="o">=</span><span class="mi">0</span><span class="o">;</span> <span class="n">i</span><span class="o"><</span> <span class="nf">getLocalId</span><span class="o">(</span><span class="mi">0</span><span class="o">);</span> <span class="n">i</span><span class="o">++){</span> + <span class="c1">// ...</span> + <span class="n">localBarrier</span><span class="o">();</span> + <span class="c1">// ...</span> + <span class="o">}</span> + <span class="o">}</span> +<span class="o">}</span> +</code></pre></div> +<p>As a testament to how well we emulate OpenCL in JTP mode, this will also deadlock your kernel in JTP mode ;) so be careful.</p> + +<p>Performance impact in JTP mode +Of course Java itself does not support local memory in any form. So any time code using local memory falls back to JTP mode we must expect a considerable performance degradation (try the NBody local example in JTP mode).</p> + +<p>We do honor localBarrier() using Java’s barrier from the new concurrency utils. However, Java’s memory model does not require the use of a barrier to observe array changes across threads. So these barriers are basically just an expense.</p> + +<p>I would recommend using local memory and barriers only if I am 90% sure the code will run on the GPU.</p> + +<h2>Can I see some code?</h2> + +<p>There is a version of NBody example which uses local memory, the source can be <a href="https://github.com/Syncleus/aparapi-examples/blob/master/src/main/java/com/aparapi/examples/nbody/Local.java" rel="nofollow">found here</a>.</p> + +</div> +</main> +<footer class='page-footer'> +<div class='container'> +<div class='row'> +<div class='col l4 s12'> +<h5 class='white-text'>Help Aparapi Grow</h5> +<p class='grey-text text-lighten-4'>We are a team of volunteers working on this project like it's our full time job. Any amount would help support and continue development on this project and is greatly appreciated.</p> +<form action='https://www.paypal.com/cgi-bin/webscr' id='paypal-donate' method='post' target='_top'> +<input name='cmd' type='hidden' value='_s-xclick'> +<input name='encrypted' type='hidden' value='-----BEGIN PKCS7-----MIIHoAYJKoZIhvcNAQcEoIIHkTCCB40CAQExggEwMIIBLAIBADCBlDCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20CAQAwDQYJKoZIhvcNAQEBBQAEgYATcKxN8t35TG2x34eY272SuZO3QbGy+BTGIM5DRV6Hmosotzw2TF42ceWmbXb3Gk4Wy5kUgo4TgHExCZHUSlHUl+A9KWLFejotgQJPhbiBsnns3klWbKftA3LEnP/kz/SW7OyBlpluoHoEGb354/aoX3JEctp3akHiZEmD7JyEgjELMAkGBSsOAwIaBQAwggEcBgkqhkiG9w0BBwEwFAYIKoZIhvcNAwcECOGCJwba6JICgIH4RtE1LE3juagKs+swI5tb9Y2LacWo+qn1H1aLKeg57bQMqqcWYvkoO1joYoglPc1h4mO0egZjHPQ6ih0K0IYlXw2SRpNylSlIMUE3GW6smjSSwRhscZfXQYUnmQsfYvkFwoKrlZGf/1u0Q7nwlZ1szIKnDMZ5f+k8xBcM0sMNutn/y9CH6A3zo01gQBIF29+1WYAoQspNAnfWQy3ydV7nbjIA9ThDp2WquWw3EVlvqlvm/3C2AFuH/L4q0ltn3qjkCdzXK0O2jW3TRrzligPkAy6CN0Tw2jGW5GENNC1L92vHFH4kBXUPlhvw39TgoN7/KRUjVoYPYgugggOHMIIDgzCCAuygAwIBAgIBADANBgkqhkiG9w0BAQUFADCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20wHhcNMDQwMjEzMTAxMzE1WhcNMzUwMjEzMTAxMzE1WjCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20wgZ8wDQYJKoZIhvcNAQEBBQADgY0AMIGJAoGBAMFHTt38RMxLXJyO2SmS+Ndl72T7oKJ4u4uw+6awntALWh03PewmIJuzbALScsTS4sZoS1fKciBGoh11gIfHzylvkdNe/hJl66/RGqrj5rFb08sAABNTzDTiqqNpJeBsYs/c2aiGozptX2RlnBktH+SUNpAajW724Nv2Wvhif6sFAgMBAAGjge4wgeswHQYDVR0OBBYEFJaffLvGbxe9WT9S1wob7BDWZJRrMIG7BgNVHSMEgbMwgbCAFJaffLvGbxe9WT9S1wob7BDWZJRroYGUpIGRMIGOMQswCQYDVQQGEwJVUzELMAkGA1UECBMCQ0ExFjAUBgNVBAcTDU1vdW50YWluIFZpZXcxFDASBgNVBAoTC1BheVBhbCBJbmMuMRMwEQYDVQQLFApsaXZlX2NlcnRzMREwDwYDVQQDFAhsaXZlX2FwaTEcMBoGCSqGSIb3DQEJARYNcmVAcGF5cGFsLmNvbYIBADAMBgNVHRMEBTADAQH/MA0GCSqGSIb3DQEBBQUAA4GBAIFfOlaagFrl71+jq6OKidbWFSE+Q4FqROvdgIONth+8kSK//Y/4ihuE4Ymvzn5ceE3S/iBSQQMjyvb+s2TWbQYDwcp129OPIbD9epdr4tJOUNiSojw7BHwYRiPh58S1xGlFgHFXwrEBb3dgNbMUa+u4qectsMAXpVHnD9wIyfmHMYIBmjCCAZYCAQEwgZQwgY4xCzAJBgNVBAYTAlVTMQswCQYDVQQIEwJDQTEWMBQGA1UEBxMNTW91bnRhaW4gVmlldzEUMBIGA1UEChMLUGF5UGFsIEluYy4xEzARBgNVBAsUCmxpdmVfY2VydHMxETAPBgNVBAMUCGxpdmVfYXBpMRwwGgYJKoZIhvcNAQkBFg1yZUBwYXlwYWwuY29tAgEAMAkGBSsOAwIaBQCgXTAYBgkqhkiG9w0BCQMxCwYJKoZIhvcNAQcBMBwGCSqGSIb3DQEJBTEPFw0xNjExMjkyMjA1NTNaMCMGCSqGSIb3DQEJBDEWBBS4i3Exr/pFcKOJy8uKmH+nGIMjqDANBgkqhkiG9w0BAQEFAASBgDAbFZ2jieloeB/0wCAcvYCFAIXmmBaMS5js/byzU5gK7exSTlRMX74IkmHemItaOcw3wyFlu4i118D9K9SbSbFiX9DGDcezGh42u/6G8TuZMwlvmiehwMioTVcm4jWG40YLiv8pJZypfoSx2w4IAFb4na5i/E1qOrwQOpiBho+s-----END PKCS7----- +'> +<button alt='PayPal - The safer, easier way to pay online!' class='btn waves-effect waves-light red lighten-3' name='action' type='submit'> +Donate Now +</button> +</form> +</div> +<div class='col l4 s12'> +<h5 class='white-text'>Join the Discussion</h5> +<p class='grey-text text-lighten-4'>We have a Gitter chat room set up where you can talk directly with us. Come in and discuss new features, future goals, general problems or questions, or anything else you can think of.</p> +<a class='btn waves-effect waves-light red lighten-3' href='https://gitter.im/Syncleus/aparapi' target='_blank'>Chat</a> +</div> +<div class='col l4 s12' style='overflow: hidden;'> +<h5 class='white-text'>Connect</h5> +<iframe allowtransparency='true' frameborder='0' height='30' scrolling='0' src='http://ghbtns.com/github-btn.html?user=Syncleus&repo=aparapi&type=watch&count=true&size=large' width='170'></iframe> +<br> +<a class='twitter-follow-button' data-dnt='true' data-show-count='true' data-size='large' href='https://twitter.com/AparapiLib'>Follow @AparapiLib</a> +<br> +<div class='g-follow' data-annotation='bubble' data-height='24' data-href='https://plus.google.com/102266131584900704956' data-rel='publisher'></div> +</div> +</div> +</div> +<div class='footer-copyright'> +<div class='container'> +© 2016-2017 Syncleus, All rights reserved. +<a class='grey-text text-lighten-4 right' href='https://github.com/Syncleus/aparapi/blob/master/LICENSE'>Apache License v2</a> +</div> +</div> +</footer> +<!-- Scripts --> +<script src='https://code.jquery.com/jquery-2.1.4.min.js'></script> +<script> + if (!window.jQuery) { document.write('<script src="bin/jquery-2.1.1.min.js"><\/script>'); } +</script> +<script src='/javascripts/jquery.timeago.js'></script> +<script src='/javascripts/materialize.min.js'></script> +<script src='/javascripts/lunr.min.js'></script> +<script src='/javascripts/search.js'></script> +<script src='/javascripts/materialize.js'></script> +<script src='/javascripts/init.js'></script> +<!-- Twitter Button --> +<script> + !function(d,s,id){var js,fjs=d.getElementsByTagName(s)[0],p=/^http:/.test(d.location)?'http':'https';if(!d.getElementById(id)){js=d.createElement(s);js.id=id;js.src=p+'://platform.twitter.com/widgets.js';fjs.parentNode.insertBefore(js,fjs);}}(document, 'script', 'twitter-wjs'); +</script> +<!-- Google Plus Button --> +<script async='' defer='defer' src='https://apis.google.com/js/platform.js'></script> +</body> </html> diff --git a/documentation/multiple-dim-ranges.html b/documentation/multiple-dim-ranges.html index 57034041a4adee979d2b3f4f3b45379bf3c8bcb2..ff94213a8a8547f5674426512081b7342dc00035 100644 --- a/documentation/multiple-dim-ranges.html +++ b/documentation/multiple-dim-ranges.html @@ -1,174 +1,176 @@ <!DOCTYPE html> <html lang='en'> - <head> - <meta content='text/html; charset=UTF-8' http-equiv='Content-Type'> - <meta content='width=device-width, initial-scale=1' name='viewport'> - <meta content='IE=edge' http-equiv='X-UA-Compatible'> - <meta content='no' name='msapplication-tap-highlight'> - <meta content='Aparapi is an Open-source framework for executing native Java code on the GPU, developed by Syncleus.' name='description'> - <title> - Aparapi | Multiple Dim Ranges - </title> - <!-- Favicons --> - <link href='/images/favicon/apple-touch-icon-152x152.png' rel='apple-touch-icon-precomposed'> - <meta content='#FFFFFF' name='msapplication-TileColor'> - <meta content='/images/favicon/mstile-144x144.png' name='msapplication-TileImage'> - <link href='/images/favicon/favicon-32x32.png' rel='icon' sizes='32x32'> - <!-- Android 5 Chrome Color --> - <meta content='#EE6E73' name='theme-color'> - <!-- CSS --> - <link href='/stylesheets/highlight.css' media='screen,projection' rel='stylesheet' type='text/css'> - <link href='/stylesheets/style.css' media='screen,projection' rel='stylesheet' type='text/css'> - <link href='http://fonts.googleapis.com/css?family=Inconsolata' rel='stylesheet' type='text/css'> - <link href='http://fonts.googleapis.com/icon?family=Material+Icons' rel='stylesheet'> - </head> - <body> - <header> - <div class='container'> - <a class='button-collapse top-nav waves-effect waves-light circle hide-on-large-only' data-activates='nav-mobile' href='#'> - <i class='material-icons'>menu</i> - </a> - </div> - <ul class='side-nav fixed' id='nav-mobile'> - <li class='logo'> - <a class='brand-logo' href='/' id='logo-container'> - <object data='/images/logo.svg' id='front-page-logo' type='image/svg+xml'>Your browser does not support SVG</object> - </a> - </li> - <li class='search'> - <div class='search-wrapper card'> - <input id='search'> - <i class='material-icons'>search</i> - <div class='search-results'></div> - </div> - </li> - <li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/">Overview</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Introduction</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/about.html">About</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/getting-started.html">Getting Started</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/faq.html">FAQ</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Documentation</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/aparapi-patterns.html">Aparapi Patterns</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/choosing-specific-devices.html">Choosing Specific Devices</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/converting-java-to-opencl.html">Converting Java to OpenCL</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/emulating-multiple-entrypoints.html">Emulating Multiple Entrypoints</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/explicit-buffer-handling.html">Explicit Buffer Handling</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/hsa-enabled-lambda.html">HSA Enabled Lambda</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/kernel-guidelines.html">Kernel Guidelines</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/library-agent-duality.html">Library Agent Duality</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/new-features.html">New Features</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/opencl-bindings.html">OpenCL Bindings</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/private-memory-space.html">Private Memory Space</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/profiling-the-kernel.html">Profiling the Kernel</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/setting-up-hsa.html">Setting Up HSA</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/unit-tests.html">Unit Tests</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/using-hsa-simulator.html">Using HSA Simulator</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/constant-memory.html">Constant Memory</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/local-memory.html">Local Memory</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/multiple-dim-ranges.html">Multiple Dim Ranges</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Proposals</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/multiple-dim-nd-range.html">Multiple Dim ND Range</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/lambdas.html">Lambdas</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/address-space-with-buffers.html">Address Space with Buffers</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/extensions.html">Extensions</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/device.html">Device</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/multiple-entry-points.html">Multiple Entry Points</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/lambda-syntax.html">Lambda Syntax</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/showcase.html">Showcase</a></li></ul></li> - </ul> - </header> - <main> - <div class='section no-pad-bot' id='index-banner'> - <div class='container'> - <h1 class='header center-on-small-only'>Multiple Dim Ranges</h1> - <div class='row center'> - <h4 class='header col s12 light center'>How to use the Range class (for multi-dim range access).</h4> - </div> - - </div> - - </div> - <div class='container'> - <p>Aparapi now allows developers to execute over one, two or three dimensional ranges. OpenCL natively allows the user to execute over 1, 2 or 3 dimension grids via the clEnqueueNDRangeKernel() method.</p> - - <p>Initially we chose not to expose 2D or 3D ranges (Aparapi’s Kernel.execute(range) allowed only !d ranges, but following a specific request we added the notion of a Range via the new com.aparapi.Range class.</p> - - <p>A range is created using various static factory methods. For example to create a simple range {0..1024} we would use.</p> - - <p>Range range = Range.create(1024); - In this case the range will span 1..1024 and a ‘default’ group size will be decided behind the scenes (256 probably in this case).</p> - - <p>If the user wishes to select a specific group size (say 32) for a one dimensional Range (0..1024) then they can use.</p> - - <p>Range range = Range.create(1024, 32); - The group size must always be a 'factor’ of the global range. So globalRange % groupSize == 0</p> - - <p>For a 2D range we use the Range.create2D(…) factory methods.</p> - - <p>Range range = Range.create2D(32, 32); - The above represents a 2D grid of execution 32 rows by 32 columns. In this case a default group size will be determined by the runtime.</p> - - <p>If we wish to specify the groupsize (say 4x4) then we can use.</p> - <pre class="highlight java"><code> - <span class="n">Range</span> <span class="n">range</span> <span class="o">=</span> <span class="n">Range</span><span class="o">.</span><span class="na">create2D</span><span class="o">(</span><span class="mi">32</span><span class="o">,</span> <span class="mi">32</span><span class="o">,</span> <span class="mi">4</span><span class="o">,</span> <span class="mi">4</span><span class="o">);</span> - <span class="n">This</span> <span class="n">example</span> <span class="n">uses</span> <span class="n">a</span> <span class="mi">2</span><span class="n">D</span> <span class="n">range</span> <span class="n">to</span> <span class="n">apply</span> <span class="n">a</span> <span class="n">blurring</span> <span class="n">convolution</span> <span class="n">effect</span> <span class="n">to</span> <span class="n">a</span> <span class="n">pixel</span> <span class="n">buffer</span><span class="o">.</span> - - <span class="kd">final</span> <span class="kd">static</span> <span class="kt">int</span> <span class="n">WIDTH</span><span class="o">=</span><span class="mi">128</span><span class="o">;</span> - <span class="kd">final</span> <span class="kd">static</span> <span class="kt">int</span> <span class="n">HEIGHT</span><span class="o">=</span><span class="mi">64</span><span class="o">;</span> - <span class="kd">final</span> <span class="kt">int</span> <span class="n">in</span><span class="o">[]</span> <span class="o">=</span> <span class="k">new</span> <span class="kt">int</span><span class="o">[</span><span class="n">WIDTH</span><span class="o">*</span><span class="n">HEIGHT</span><span class="o">];</span> - <span class="kd">final</span> <span class="kt">int</span> <span class="n">out</span><span class="o">[]</span> <span class="o">=</span> <span class="k">new</span> <span class="kt">int</span><span class="o">[</span><span class="n">WIDTH</span><span class="o">*</span><span class="n">HEIGHT</span><span class="o">];</span> - <span class="n">Kernel</span> <span class="n">kernel</span> <span class="o">=</span> <span class="k">new</span> <span class="n">Kernel</span><span class="o">(){</span> - <span class="kd">public</span> <span class="kt">void</span> <span class="nf">run</span><span class="o">(){</span> - <span class="kt">int</span> <span class="n">x</span> <span class="o">=</span> <span class="n">getGlobalId</span><span class="o">(</span><span class="mi">0</span><span class="o">);</span> - <span class="kt">int</span> <span class="n">y</span> <span class="o">=</span> <span class="n">getGlobalId</span><span class="o">(</span><span class="mi">1</span><span class="o">);</span> - <span class="k">if</span> <span class="o">(</span><span class="n">x</span><span class="o">></span><span class="mi">0</span> <span class="o">&&</span> <span class="n">x</span><span class="o"><(</span><span class="n">getGlobalSize</span><span class="o">(</span><span class="mi">0</span><span class="o">)-</span><span class="mi">1</span><span class="o">)</span> <span class="o">&&</span> <span class="n">y</span><span class="o">></span><span class="mi">0</span> <span class="o">&&</span> <span class="n">y</span><span class="o"><(</span><span class="n">getGlobalSize</span><span class="o">(</span><span class="mi">0</span><span class="o">)-</span><span class="mi">1</span><span class="o">)){</span> - <span class="kt">int</span> <span class="n">sum</span> <span class="o">=</span> <span class="mi">0</span><span class="o">;</span> - <span class="k">for</span> <span class="o">(</span><span class="kt">int</span> <span class="n">dx</span> <span class="o">=-</span><span class="mi">1</span><span class="o">;</span> <span class="n">dx</span><span class="o"><</span><span class="mi">2</span><span class="o">;</span> <span class="n">dx</span><span class="o">++){</span> - <span class="k">for</span> <span class="o">(</span><span class="kt">int</span> <span class="n">dy</span> <span class="o">=-</span><span class="mi">1</span><span class="o">;</span> <span class="n">dy</span><span class="o"><</span><span class="mi">2</span><span class="o">;</span> <span class="n">dy</span><span class="o">++){</span> - <span class="n">sum</span><span class="o">+=</span><span class="n">in</span><span class="o">[(</span><span class="n">y</span><span class="o">+</span><span class="n">dy</span><span class="o">)*</span><span class="n">getGlobalSize</span><span class="o">(</span><span class="mi">0</span><span class="o">)+(</span><span class="n">x</span><span class="o">+</span><span class="n">dx</span><span class="o">)];</span> - <span class="o">}</span> - <span class="o">}</span> - <span class="n">out</span><span class="o">[</span><span class="n">y</span><span class="o">*</span><span class="n">getGlobalSize</span><span class="o">(</span><span class="mi">0</span><span class="o">)+</span><span class="n">x</span><span class="o">]</span> <span class="o">=</span> <span class="n">sum</span><span class="o">/</span><span class="mi">9</span><span class="o">;</span> - <span class="o">}</span> +<head> +<meta content='text/html; charset=UTF-8' http-equiv='Content-Type'> +<meta content='width=device-width, initial-scale=1' name='viewport'> +<meta content='IE=edge' http-equiv='X-UA-Compatible'> +<meta content='no' name='msapplication-tap-highlight'> +<meta content='Aparapi is an Open-source framework for executing native Java code on the GPU, developed by Syncleus.' name='description'> +<title> +Aparapi | Multiple Dim Ranges +</title> +<!-- Favicons --> +<link href='/images/favicon/apple-touch-icon-152x152.png' rel='apple-touch-icon-precomposed'> +<meta content='#FFFFFF' name='msapplication-TileColor'> +<meta content='/images/favicon/mstile-144x144.png' name='msapplication-TileImage'> +<link href='/images/favicon/favicon-32x32.png' rel='icon' sizes='32x32'> +<!-- Android 5 Chrome Color --> +<meta content='#EE6E73' name='theme-color'> +<!-- CSS --> +<link href='/stylesheets/highlight.css' media='screen,projection' rel='stylesheet' type='text/css'> +<link href='/stylesheets/style.css' media='screen,projection' rel='stylesheet' type='text/css'> +<link href='http://fonts.googleapis.com/css?family=Inconsolata' rel='stylesheet' type='text/css'> +<link href='http://fonts.googleapis.com/icon?family=Material+Icons' rel='stylesheet'> +</head> +<body> +<header> +<div class='container'> +<a class='button-collapse top-nav waves-effect waves-light circle hide-on-large-only' data-activates='nav-mobile' href='#'> +<i class='material-icons'>menu</i> +</a> +</div> +<ul class='side-nav fixed' id='nav-mobile'> +<li class='logo'> +<a class='brand-logo' href='/' id='logo-container'> +<object data='/images/logo.svg' id='front-page-logo' type='image/svg+xml'>Your browser does not support SVG</object> +</a> +</li> +<li class='search'> +<div class='search-wrapper card'> +<input id='search'> +<i class='material-icons'>search</i> +<div class='search-results'></div> +</div> +</li> +<li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/">Overview</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Introduction</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/about.html">About</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/getting-started.html">Getting Started</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/faq.html">FAQ</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Documentation</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/aparapi-patterns.html">Aparapi Patterns</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/choosing-specific-devices.html">Choosing Specific Devices</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/converting-java-to-opencl.html">Converting Java to OpenCL</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/emulating-multiple-entrypoints.html">Emulating Multiple Entrypoints</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/explicit-buffer-handling.html">Explicit Buffer Handling</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/hsa-enabled-lambda.html">HSA Enabled Lambda</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/kernel-guidelines.html">Kernel Guidelines</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/library-agent-duality.html">Library Agent Duality</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/new-features.html">New Features</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/opencl-bindings.html">OpenCL Bindings</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/private-memory-space.html">Private Memory Space</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/profiling-the-kernel.html">Profiling the Kernel</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/setting-up-hsa.html">Setting Up HSA</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/unit-tests.html">Unit Tests</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/using-hsa-simulator.html">Using HSA Simulator</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/constant-memory.html">Constant Memory</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/local-memory.html">Local Memory</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/multiple-dim-ranges.html">Multiple Dim Ranges</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Proposals</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/multiple-dim-nd-range.html">Multiple Dim ND Range</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/lambdas.html">Lambdas</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/address-space-with-buffers.html">Address Space with Buffers</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/extensions.html">Extensions</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/device.html">Device</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/multiple-entry-points.html">Multiple Entry Points</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/lambda-syntax.html">Lambda Syntax</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/showcase.html">Showcase</a></li></ul></li> +</ul> +</header> +<main> +<div class='section no-pad-bot' id='index-banner'> +<div class='container'> +<h1 class='header center-on-small-only'>Multiple Dim Ranges</h1> +<div class='row center'> +<h4 class='header col s12 light center'>How to use the Range class (for multi-dim range access).</h4> +</div> + +</div> + +</div> +<div class='container'> +<p>Aparapi now allows developers to execute over one, two or three dimensional ranges. OpenCL natively allows the user to execute over 1, 2 or 3 dimension grids via the clEnqueueNDRangeKernel() method.</p> + +<p>Initially we chose not to expose 2D or 3D ranges (Aparapi’s Kernel.execute(range) allowed only !d ranges, but following a specific request we added the notion of a Range via the new com.aparapi.Range class.</p> + +<p>A range is created using various static factory methods. For example to create a simple range {0..1024} we would use.</p> + +<p>Range range = Range.create(1024); +In this case the range will span 1..1024 and a ‘default’ group size will be decided behind the scenes (256 probably in this case).</p> + +<p>If the user wishes to select a specific group size (say 32) for a one dimensional Range (0..1024) then they can use.</p> + +<p>Range range = Range.create(1024, 32); +The group size must always be a ‘factor’ of the global range. So globalRange % groupSize == 0</p> + +<p>For a 2D range we use the Range.create2D(…) factory methods.</p> + +<p>Range range = Range.create2D(32, 32); +The above represents a 2D grid of execution 32 rows by 32 columns. In this case a default group size will be determined by the runtime.</p> + +<p>If we wish to specify the groupsize (say 4x4) then we can use.</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="n">Range</span> <span class="n">range</span> <span class="o">=</span> <span class="n">Range</span><span class="o">.</span><span class="na">create2D</span><span class="o">(</span><span class="mi">32</span><span class="o">,</span> <span class="mi">32</span><span class="o">,</span> <span class="mi">4</span><span class="o">,</span> <span class="mi">4</span><span class="o">);</span> +<span class="n">This</span> <span class="n">example</span> <span class="n">uses</span> <span class="n">a</span> <span class="mi">2</span><span class="n">D</span> <span class="n">range</span> <span class="n">to</span> <span class="n">apply</span> <span class="n">a</span> <span class="n">blurring</span> <span class="n">convolution</span> <span class="n">effect</span> <span class="n">to</span> <span class="n">a</span> <span class="n">pixel</span> <span class="n">buffer</span><span class="o">.</span> + +<span class="kd">final</span> <span class="kd">static</span> <span class="kt">int</span> <span class="n">WIDTH</span><span class="o">=</span><span class="mi">128</span><span class="o">;</span> +<span class="kd">final</span> <span class="kd">static</span> <span class="kt">int</span> <span class="n">HEIGHT</span><span class="o">=</span><span class="mi">64</span><span class="o">;</span> +<span class="kd">final</span> <span class="kt">int</span> <span class="n">in</span><span class="o">[]</span> <span class="o">=</span> <span class="k">new</span> <span class="kt">int</span><span class="o">[</span><span class="n">WIDTH</span><span class="o">*</span><span class="n">HEIGHT</span><span class="o">];</span> +<span class="kd">final</span> <span class="kt">int</span> <span class="n">out</span><span class="o">[]</span> <span class="o">=</span> <span class="k">new</span> <span class="kt">int</span><span class="o">[</span><span class="n">WIDTH</span><span class="o">*</span><span class="n">HEIGHT</span><span class="o">];</span> +<span class="n">Kernel</span> <span class="n">kernel</span> <span class="o">=</span> <span class="k">new</span> <span class="n">Kernel</span><span class="o">(){</span> + <span class="kd">public</span> <span class="kt">void</span> <span class="nf">run</span><span class="o">(){</span> + <span class="kt">int</span> <span class="n">x</span> <span class="o">=</span> <span class="n">getGlobalId</span><span class="o">(</span><span class="mi">0</span><span class="o">);</span> + <span class="kt">int</span> <span class="n">y</span> <span class="o">=</span> <span class="n">getGlobalId</span><span class="o">(</span><span class="mi">1</span><span class="o">);</span> + <span class="k">if</span> <span class="o">(</span><span class="n">x</span><span class="o">></span><span class="mi">0</span> <span class="o">&&</span> <span class="n">x</span><span class="o"><(</span><span class="n">getGlobalSize</span><span class="o">(</span><span class="mi">0</span><span class="o">)-</span><span class="mi">1</span><span class="o">)</span> <span class="o">&&</span> <span class="n">y</span><span class="o">></span><span class="mi">0</span> <span class="o">&&</span> <span class="n">y</span><span class="o"><(</span><span class="n">getGlobalSize</span><span class="o">(</span><span class="mi">0</span><span class="o">)-</span><span class="mi">1</span><span class="o">)){</span> + <span class="kt">int</span> <span class="n">sum</span> <span class="o">=</span> <span class="mi">0</span><span class="o">;</span> + <span class="k">for</span> <span class="o">(</span><span class="kt">int</span> <span class="n">dx</span> <span class="o">=-</span><span class="mi">1</span><span class="o">;</span> <span class="n">dx</span><span class="o"><</span><span class="mi">2</span><span class="o">;</span> <span class="n">dx</span><span class="o">++){</span> + <span class="k">for</span> <span class="o">(</span><span class="kt">int</span> <span class="n">dy</span> <span class="o">=-</span><span class="mi">1</span><span class="o">;</span> <span class="n">dy</span><span class="o"><</span><span class="mi">2</span><span class="o">;</span> <span class="n">dy</span><span class="o">++){</span> + <span class="n">sum</span><span class="o">+=</span><span class="n">in</span><span class="o">[(</span><span class="n">y</span><span class="o">+</span><span class="n">dy</span><span class="o">)*</span><span class="n">getGlobalSize</span><span class="o">(</span><span class="mi">0</span><span class="o">)+(</span><span class="n">x</span><span class="o">+</span><span class="n">dx</span><span class="o">)];</span> <span class="o">}</span> - - <span class="o">};</span> - <span class="n">Range</span> <span class="n">range</span> <span class="o">=</span> <span class="n">Range</span><span class="o">.</span><span class="na">create2D</span><span class="o">(</span><span class="n">WIDTH</span><span class="o">,</span> <span class="n">HEIGHT</span><span class="o">);</span> - <span class="n">kernel</span><span class="o">.</span><span class="na">execute</span><span class="o">(</span><span class="n">range</span><span class="o">);</span> - </code></pre> - <h2>Handling this from JTP mode</h2> - - <p>Mapping to OpenCL for this is all fairly straightforward.</p> - - <p>In Java JTP mode we have to emulate the execution over the 1D, 2D and 3D ranges using threads. Note that the number of threads we launch is essentially the size of the group. So be careful creating large groups.</p> - - <p>If we ask for a 3D range using :-</p> - <pre class="highlight java"><code> - <span class="n">Range</span> <span class="n">range</span> <span class="o">=</span> <span class="n">Range</span><span class="o">.</span><span class="na">create3D</span><span class="o">(</span><span class="mi">1024</span><span class="o">,</span> <span class="mi">1024</span><span class="o">,</span> <span class="mi">1024</span><span class="o">,</span> <span class="mi">8</span><span class="o">,</span> <span class="mi">8</span><span class="o">,</span> <span class="mi">8</span><span class="o">);</span> - </code></pre> - <p>We are asking for a group size of <code>8x8x8 == 512</code>. So we are asking for 512 threads!</p> - </div> - </main> - <footer class='page-footer'> - <div class='container'> - <div class='row'> - <div class='col l4 s12'> - <h5 class='white-text'>Help Aparapi Grow</h5> - <p class='grey-text text-lighten-4'>We are a team of volunteers working on this project like it's our full time job. Any amount would help support and continue development on this project and is greatly appreciated.</p> - <form action='https://www.paypal.com/cgi-bin/webscr' id='paypal-donate' method='post' target='_top'> - <input name='cmd' type='hidden' value='_s-xclick'> - <input name='encrypted' type='hidden' value='-----BEGIN PKCS7-----MIIHoAYJKoZIhvcNAQcEoIIHkTCCB40CAQExggEwMIIBLAIBADCBlDCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20CAQAwDQYJKoZIhvcNAQEBBQAEgYATcKxN8t35TG2x34eY272SuZO3QbGy+BTGIM5DRV6Hmosotzw2TF42ceWmbXb3Gk4Wy5kUgo4TgHExCZHUSlHUl+A9KWLFejotgQJPhbiBsnns3klWbKftA3LEnP/kz/SW7OyBlpluoHoEGb354/aoX3JEctp3akHiZEmD7JyEgjELMAkGBSsOAwIaBQAwggEcBgkqhkiG9w0BBwEwFAYIKoZIhvcNAwcECOGCJwba6JICgIH4RtE1LE3juagKs+swI5tb9Y2LacWo+qn1H1aLKeg57bQMqqcWYvkoO1joYoglPc1h4mO0egZjHPQ6ih0K0IYlXw2SRpNylSlIMUE3GW6smjSSwRhscZfXQYUnmQsfYvkFwoKrlZGf/1u0Q7nwlZ1szIKnDMZ5f+k8xBcM0sMNutn/y9CH6A3zo01gQBIF29+1WYAoQspNAnfWQy3ydV7nbjIA9ThDp2WquWw3EVlvqlvm/3C2AFuH/L4q0ltn3qjkCdzXK0O2jW3TRrzligPkAy6CN0Tw2jGW5GENNC1L92vHFH4kBXUPlhvw39TgoN7/KRUjVoYPYgugggOHMIIDgzCCAuygAwIBAgIBADANBgkqhkiG9w0BAQUFADCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20wHhcNMDQwMjEzMTAxMzE1WhcNMzUwMjEzMTAxMzE1WjCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20wgZ8wDQYJKoZIhvcNAQEBBQADgY0AMIGJAoGBAMFHTt38RMxLXJyO2SmS+Ndl72T7oKJ4u4uw+6awntALWh03PewmIJuzbALScsTS4sZoS1fKciBGoh11gIfHzylvkdNe/hJl66/RGqrj5rFb08sAABNTzDTiqqNpJeBsYs/c2aiGozptX2RlnBktH+SUNpAajW724Nv2Wvhif6sFAgMBAAGjge4wgeswHQYDVR0OBBYEFJaffLvGbxe9WT9S1wob7BDWZJRrMIG7BgNVHSMEgbMwgbCAFJaffLvGbxe9WT9S1wob7BDWZJRroYGUpIGRMIGOMQswCQYDVQQGEwJVUzELMAkGA1UECBMCQ0ExFjAUBgNVBAcTDU1vdW50YWluIFZpZXcxFDASBgNVBAoTC1BheVBhbCBJbmMuMRMwEQYDVQQLFApsaXZlX2NlcnRzMREwDwYDVQQDFAhsaXZlX2FwaTEcMBoGCSqGSIb3DQEJARYNcmVAcGF5cGFsLmNvbYIBADAMBgNVHRMEBTADAQH/MA0GCSqGSIb3DQEBBQUAA4GBAIFfOlaagFrl71+jq6OKidbWFSE+Q4FqROvdgIONth+8kSK//Y/4ihuE4Ymvzn5ceE3S/iBSQQMjyvb+s2TWbQYDwcp129OPIbD9epdr4tJOUNiSojw7BHwYRiPh58S1xGlFgHFXwrEBb3dgNbMUa+u4qectsMAXpVHnD9wIyfmHMYIBmjCCAZYCAQEwgZQwgY4xCzAJBgNVBAYTAlVTMQswCQYDVQQIEwJDQTEWMBQGA1UEBxMNTW91bnRhaW4gVmlldzEUMBIGA1UEChMLUGF5UGFsIEluYy4xEzARBgNVBAsUCmxpdmVfY2VydHMxETAPBgNVBAMUCGxpdmVfYXBpMRwwGgYJKoZIhvcNAQkBFg1yZUBwYXlwYWwuY29tAgEAMAkGBSsOAwIaBQCgXTAYBgkqhkiG9w0BCQMxCwYJKoZIhvcNAQcBMBwGCSqGSIb3DQEJBTEPFw0xNjExMjkyMjA1NTNaMCMGCSqGSIb3DQEJBDEWBBS4i3Exr/pFcKOJy8uKmH+nGIMjqDANBgkqhkiG9w0BAQEFAASBgDAbFZ2jieloeB/0wCAcvYCFAIXmmBaMS5js/byzU5gK7exSTlRMX74IkmHemItaOcw3wyFlu4i118D9K9SbSbFiX9DGDcezGh42u/6G8TuZMwlvmiehwMioTVcm4jWG40YLiv8pJZypfoSx2w4IAFb4na5i/E1qOrwQOpiBho+s-----END PKCS7-----'> - <button alt='PayPal - The safer, easier way to pay online!' class='btn waves-effect waves-light red lighten-3' name='action' type='submit'> - Donate Now - </button> - </form> - </div> - <div class='col l4 s12'> - <h5 class='white-text'>Join the Discussion</h5> - <p class='grey-text text-lighten-4'>We have a Gitter chat room set up where you can talk directly with us. Come in and discuss new features, future goals, general problems or questions, or anything else you can think of.</p> - <a class='btn waves-effect waves-light red lighten-3' href='https://gitter.im/Syncleus/aparapi' target='_blank'>Chat</a> - </div> - <div class='col l4 s12' style='overflow: hidden;'> - <h5 class='white-text'>Connect</h5> - <iframe allowtransparency='true' frameborder='0' height='30' scrolling='0' src='http://ghbtns.com/github-btn.html?user=Syncleus&repo=aparapi&type=watch&count=true&size=large' width='170'></iframe> - <br> - <a class='twitter-follow-button' data-dnt='true' data-show-count='true' data-size='large' href='https://twitter.com/AparapiLib'>Follow @AparapiLib</a> - <br> - <div class='g-follow' data-annotation='bubble' data-height='24' data-href='https://plus.google.com/102266131584900704956' data-rel='publisher'></div> - </div> - </div> - </div> - <div class='footer-copyright'> - <div class='container'> - © 2016-2017 Syncleus, All rights reserved. - <a class='grey-text text-lighten-4 right' href='https://github.com/Syncleus/aparapi/blob/master/LICENSE'>Apache License v2</a> - </div> - </div> - </footer> - <!-- Scripts --> - <script src='https://code.jquery.com/jquery-2.1.4.min.js'></script> - <script> - if (!window.jQuery) { document.write('<script src="bin/jquery-2.1.1.min.js"><\/script>'); } - </script> - <script src='/javascripts/jquery.timeago.js'></script> - <script src='/javascripts/materialize.min.js'></script> - <script src='/javascripts/lunr.min.js'></script> - <script src='/javascripts/search.js'></script> - <script src='/javascripts/materialize.js'></script> - <script src='/javascripts/init.js'></script> - <!-- Twitter Button --> - <script> - !function(d,s,id){var js,fjs=d.getElementsByTagName(s)[0],p=/^http:/.test(d.location)?'http':'https';if(!d.getElementById(id)){js=d.createElement(s);js.id=id;js.src=p+'://platform.twitter.com/widgets.js';fjs.parentNode.insertBefore(js,fjs);}}(document, 'script', 'twitter-wjs'); - </script> - <!-- Google Plus Button --> - <script async='' defer='defer' src='https://apis.google.com/js/platform.js'></script> - </body> + <span class="o">}</span> + <span class="n">out</span><span class="o">[</span><span class="n">y</span><span class="o">*</span><span class="n">getGlobalSize</span><span class="o">(</span><span class="mi">0</span><span class="o">)+</span><span class="n">x</span><span class="o">]</span> <span class="o">=</span> <span class="n">sum</span><span class="o">/</span><span class="mi">9</span><span class="o">;</span> + <span class="o">}</span> + <span class="o">}</span> + +<span class="o">};</span> +<span class="n">Range</span> <span class="n">range</span> <span class="o">=</span> <span class="n">Range</span><span class="o">.</span><span class="na">create2D</span><span class="o">(</span><span class="n">WIDTH</span><span class="o">,</span> <span class="n">HEIGHT</span><span class="o">);</span> +<span class="n">kernel</span><span class="o">.</span><span class="na">execute</span><span class="o">(</span><span class="n">range</span><span class="o">);</span> +</code></pre></div> +<h2>Handling this from JTP mode</h2> + +<p>Mapping to OpenCL for this is all fairly straightforward.</p> + +<p>In Java JTP mode we have to emulate the execution over the 1D, 2D and 3D ranges using threads. Note that the number of threads we launch is essentially the size of the group. So be careful creating large groups.</p> + +<p>If we ask for a 3D range using :-</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="n">Range</span> <span class="n">range</span> <span class="o">=</span> <span class="n">Range</span><span class="o">.</span><span class="na">create3D</span><span class="o">(</span><span class="mi">1024</span><span class="o">,</span> <span class="mi">1024</span><span class="o">,</span> <span class="mi">1024</span><span class="o">,</span> <span class="mi">8</span><span class="o">,</span> <span class="mi">8</span><span class="o">,</span> <span class="mi">8</span><span class="o">);</span> +</code></pre></div> +<p>We are asking for a group size of <code>8x8x8 == 512</code>. So we are asking for 512 threads!</p> + +</div> +</main> +<footer class='page-footer'> +<div class='container'> +<div class='row'> +<div class='col l4 s12'> +<h5 class='white-text'>Help Aparapi Grow</h5> +<p class='grey-text text-lighten-4'>We are a team of volunteers working on this project like it's our full time job. Any amount would help support and continue development on this project and is greatly appreciated.</p> +<form action='https://www.paypal.com/cgi-bin/webscr' id='paypal-donate' method='post' target='_top'> +<input name='cmd' type='hidden' value='_s-xclick'> +<input name='encrypted' type='hidden' value='-----BEGIN PKCS7-----MIIHoAYJKoZIhvcNAQcEoIIHkTCCB40CAQExggEwMIIBLAIBADCBlDCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20CAQAwDQYJKoZIhvcNAQEBBQAEgYATcKxN8t35TG2x34eY272SuZO3QbGy+BTGIM5DRV6Hmosotzw2TF42ceWmbXb3Gk4Wy5kUgo4TgHExCZHUSlHUl+A9KWLFejotgQJPhbiBsnns3klWbKftA3LEnP/kz/SW7OyBlpluoHoEGb354/aoX3JEctp3akHiZEmD7JyEgjELMAkGBSsOAwIaBQAwggEcBgkqhkiG9w0BBwEwFAYIKoZIhvcNAwcECOGCJwba6JICgIH4RtE1LE3juagKs+swI5tb9Y2LacWo+qn1H1aLKeg57bQMqqcWYvkoO1joYoglPc1h4mO0egZjHPQ6ih0K0IYlXw2SRpNylSlIMUE3GW6smjSSwRhscZfXQYUnmQsfYvkFwoKrlZGf/1u0Q7nwlZ1szIKnDMZ5f+k8xBcM0sMNutn/y9CH6A3zo01gQBIF29+1WYAoQspNAnfWQy3ydV7nbjIA9ThDp2WquWw3EVlvqlvm/3C2AFuH/L4q0ltn3qjkCdzXK0O2jW3TRrzligPkAy6CN0Tw2jGW5GENNC1L92vHFH4kBXUPlhvw39TgoN7/KRUjVoYPYgugggOHMIIDgzCCAuygAwIBAgIBADANBgkqhkiG9w0BAQUFADCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20wHhcNMDQwMjEzMTAxMzE1WhcNMzUwMjEzMTAxMzE1WjCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20wgZ8wDQYJKoZIhvcNAQEBBQADgY0AMIGJAoGBAMFHTt38RMxLXJyO2SmS+Ndl72T7oKJ4u4uw+6awntALWh03PewmIJuzbALScsTS4sZoS1fKciBGoh11gIfHzylvkdNe/hJl66/RGqrj5rFb08sAABNTzDTiqqNpJeBsYs/c2aiGozptX2RlnBktH+SUNpAajW724Nv2Wvhif6sFAgMBAAGjge4wgeswHQYDVR0OBBYEFJaffLvGbxe9WT9S1wob7BDWZJRrMIG7BgNVHSMEgbMwgbCAFJaffLvGbxe9WT9S1wob7BDWZJRroYGUpIGRMIGOMQswCQYDVQQGEwJVUzELMAkGA1UECBMCQ0ExFjAUBgNVBAcTDU1vdW50YWluIFZpZXcxFDASBgNVBAoTC1BheVBhbCBJbmMuMRMwEQYDVQQLFApsaXZlX2NlcnRzMREwDwYDVQQDFAhsaXZlX2FwaTEcMBoGCSqGSIb3DQEJARYNcmVAcGF5cGFsLmNvbYIBADAMBgNVHRMEBTADAQH/MA0GCSqGSIb3DQEBBQUAA4GBAIFfOlaagFrl71+jq6OKidbWFSE+Q4FqROvdgIONth+8kSK//Y/4ihuE4Ymvzn5ceE3S/iBSQQMjyvb+s2TWbQYDwcp129OPIbD9epdr4tJOUNiSojw7BHwYRiPh58S1xGlFgHFXwrEBb3dgNbMUa+u4qectsMAXpVHnD9wIyfmHMYIBmjCCAZYCAQEwgZQwgY4xCzAJBgNVBAYTAlVTMQswCQYDVQQIEwJDQTEWMBQGA1UEBxMNTW91bnRhaW4gVmlldzEUMBIGA1UEChMLUGF5UGFsIEluYy4xEzARBgNVBAsUCmxpdmVfY2VydHMxETAPBgNVBAMUCGxpdmVfYXBpMRwwGgYJKoZIhvcNAQkBFg1yZUBwYXlwYWwuY29tAgEAMAkGBSsOAwIaBQCgXTAYBgkqhkiG9w0BCQMxCwYJKoZIhvcNAQcBMBwGCSqGSIb3DQEJBTEPFw0xNjExMjkyMjA1NTNaMCMGCSqGSIb3DQEJBDEWBBS4i3Exr/pFcKOJy8uKmH+nGIMjqDANBgkqhkiG9w0BAQEFAASBgDAbFZ2jieloeB/0wCAcvYCFAIXmmBaMS5js/byzU5gK7exSTlRMX74IkmHemItaOcw3wyFlu4i118D9K9SbSbFiX9DGDcezGh42u/6G8TuZMwlvmiehwMioTVcm4jWG40YLiv8pJZypfoSx2w4IAFb4na5i/E1qOrwQOpiBho+s-----END PKCS7----- +'> +<button alt='PayPal - The safer, easier way to pay online!' class='btn waves-effect waves-light red lighten-3' name='action' type='submit'> +Donate Now +</button> +</form> +</div> +<div class='col l4 s12'> +<h5 class='white-text'>Join the Discussion</h5> +<p class='grey-text text-lighten-4'>We have a Gitter chat room set up where you can talk directly with us. Come in and discuss new features, future goals, general problems or questions, or anything else you can think of.</p> +<a class='btn waves-effect waves-light red lighten-3' href='https://gitter.im/Syncleus/aparapi' target='_blank'>Chat</a> +</div> +<div class='col l4 s12' style='overflow: hidden;'> +<h5 class='white-text'>Connect</h5> +<iframe allowtransparency='true' frameborder='0' height='30' scrolling='0' src='http://ghbtns.com/github-btn.html?user=Syncleus&repo=aparapi&type=watch&count=true&size=large' width='170'></iframe> +<br> +<a class='twitter-follow-button' data-dnt='true' data-show-count='true' data-size='large' href='https://twitter.com/AparapiLib'>Follow @AparapiLib</a> +<br> +<div class='g-follow' data-annotation='bubble' data-height='24' data-href='https://plus.google.com/102266131584900704956' data-rel='publisher'></div> +</div> +</div> +</div> +<div class='footer-copyright'> +<div class='container'> +© 2016-2017 Syncleus, All rights reserved. +<a class='grey-text text-lighten-4 right' href='https://github.com/Syncleus/aparapi/blob/master/LICENSE'>Apache License v2</a> +</div> +</div> +</footer> +<!-- Scripts --> +<script src='https://code.jquery.com/jquery-2.1.4.min.js'></script> +<script> + if (!window.jQuery) { document.write('<script src="bin/jquery-2.1.1.min.js"><\/script>'); } +</script> +<script src='/javascripts/jquery.timeago.js'></script> +<script src='/javascripts/materialize.min.js'></script> +<script src='/javascripts/lunr.min.js'></script> +<script src='/javascripts/search.js'></script> +<script src='/javascripts/materialize.js'></script> +<script src='/javascripts/init.js'></script> +<!-- Twitter Button --> +<script> + !function(d,s,id){var js,fjs=d.getElementsByTagName(s)[0],p=/^http:/.test(d.location)?'http':'https';if(!d.getElementById(id)){js=d.createElement(s);js.id=id;js.src=p+'://platform.twitter.com/widgets.js';fjs.parentNode.insertBefore(js,fjs);}}(document, 'script', 'twitter-wjs'); +</script> +<!-- Google Plus Button --> +<script async='' defer='defer' src='https://apis.google.com/js/platform.js'></script> +</body> </html> diff --git a/documentation/new-features.html b/documentation/new-features.html index b8298bd6333658fc1ba8567c4d2c3aaf7d3d3350..3b0de367a71a2b2bb3efeb39417e3127c2be26c9 100644 --- a/documentation/new-features.html +++ b/documentation/new-features.html @@ -1,355 +1,357 @@ <!DOCTYPE html> <html lang='en'> - <head> - <meta content='text/html; charset=UTF-8' http-equiv='Content-Type'> - <meta content='width=device-width, initial-scale=1' name='viewport'> - <meta content='IE=edge' http-equiv='X-UA-Compatible'> - <meta content='no' name='msapplication-tap-highlight'> - <meta content='Aparapi is an Open-source framework for executing native Java code on the GPU, developed by Syncleus.' name='description'> - <title> - Aparapi | New Features - </title> - <!-- Favicons --> - <link href='/images/favicon/apple-touch-icon-152x152.png' rel='apple-touch-icon-precomposed'> - <meta content='#FFFFFF' name='msapplication-TileColor'> - <meta content='/images/favicon/mstile-144x144.png' name='msapplication-TileImage'> - <link href='/images/favicon/favicon-32x32.png' rel='icon' sizes='32x32'> - <!-- Android 5 Chrome Color --> - <meta content='#EE6E73' name='theme-color'> - <!-- CSS --> - <link href='/stylesheets/highlight.css' media='screen,projection' rel='stylesheet' type='text/css'> - <link href='/stylesheets/style.css' media='screen,projection' rel='stylesheet' type='text/css'> - <link href='http://fonts.googleapis.com/css?family=Inconsolata' rel='stylesheet' type='text/css'> - <link href='http://fonts.googleapis.com/icon?family=Material+Icons' rel='stylesheet'> - </head> - <body> - <header> - <div class='container'> - <a class='button-collapse top-nav waves-effect waves-light circle hide-on-large-only' data-activates='nav-mobile' href='#'> - <i class='material-icons'>menu</i> - </a> - </div> - <ul class='side-nav fixed' id='nav-mobile'> - <li class='logo'> - <a class='brand-logo' href='/' id='logo-container'> - <object data='/images/logo.svg' id='front-page-logo' type='image/svg+xml'>Your browser does not support SVG</object> - </a> - </li> - <li class='search'> - <div class='search-wrapper card'> - <input id='search'> - <i class='material-icons'>search</i> - <div class='search-results'></div> - </div> - </li> - <li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/">Overview</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Introduction</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/about.html">About</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/getting-started.html">Getting Started</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/faq.html">FAQ</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Documentation</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/aparapi-patterns.html">Aparapi Patterns</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/choosing-specific-devices.html">Choosing Specific Devices</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/converting-java-to-opencl.html">Converting Java to OpenCL</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/emulating-multiple-entrypoints.html">Emulating Multiple Entrypoints</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/explicit-buffer-handling.html">Explicit Buffer Handling</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/hsa-enabled-lambda.html">HSA Enabled Lambda</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/kernel-guidelines.html">Kernel Guidelines</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/library-agent-duality.html">Library Agent Duality</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/new-features.html">New Features</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/opencl-bindings.html">OpenCL Bindings</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/private-memory-space.html">Private Memory Space</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/profiling-the-kernel.html">Profiling the Kernel</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/setting-up-hsa.html">Setting Up HSA</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/unit-tests.html">Unit Tests</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/using-hsa-simulator.html">Using HSA Simulator</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/constant-memory.html">Constant Memory</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/local-memory.html">Local Memory</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/multiple-dim-ranges.html">Multiple Dim Ranges</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Proposals</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/multiple-dim-nd-range.html">Multiple Dim ND Range</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/lambdas.html">Lambdas</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/address-space-with-buffers.html">Address Space with Buffers</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/extensions.html">Extensions</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/device.html">Device</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/multiple-entry-points.html">Multiple Entry Points</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/lambda-syntax.html">Lambda Syntax</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/showcase.html">Showcase</a></li></ul></li> - </ul> - </header> - <main> - <div class='section no-pad-bot' id='index-banner'> - <div class='container'> - <h1 class='header center-on-small-only'>New Features</h1> - <div class='row center'> - <h4 class='header col s12 light center'>New Features recently added to Aparapi.</h4> - </div> - - </div> - - </div> - <div class='container'> - <h2>New Features</h2> - - <p>Aparapi has two new, especially useful features:</p> - - <p>Explicit Buffer Management for minimizing buffer transfers - Kernel access to objects held in arrays</p> - - <h3>Minimizing Buffer Transfers</h3> - - <h4>Explicit Buffer Management</h4> - - <p>Aparapi is designed to shield the Java developer from dealing with the underlying movement of data between the OpenCL host and device. Aparapi can analyze a kernel’s run() method and run-reachable methods to determine which primitive arrays to transfer to the GPU prior to execution, and which arrays to transfer back when the GPU execution is complete.</p> - - <p>Generally this strategy is both clean and performant. Aparapi will attempt to just do the right thing.</p> - - <p>However, occasionally the following code pattern is seen.</p> - <pre class="highlight java"><code> - <span class="kd">final</span> <span class="kt">int</span><span class="o">[]</span> <span class="n">hugeArray</span> <span class="o">=</span> <span class="k">new</span> <span class="kt">int</span><span class="o">[</span><span class="n">HUGE</span><span class="o">];</span> - <span class="kd">final</span> <span class="kt">int</span><span class="o">[]</span> <span class="n">done</span> <span class="o">=</span> <span class="k">new</span> <span class="kt">int</span><span class="o">[]{</span><span class="mi">0</span><span class="o">};</span> - <span class="n">Kernel</span> <span class="n">kernel</span><span class="o">=</span> <span class="k">new</span> <span class="n">Kernel</span><span class="o">(){</span> - <span class="o">...</span> <span class="c1">// reads/writes hugeArray and writes to done[0] when complete</span> - <span class="o">};</span> - <span class="n">done</span><span class="o">[</span><span class="mi">0</span><span class="o">]=</span><span class="mi">0</span><span class="o">;</span> - <span class="k">while</span> <span class="o">(</span><span class="n">done</span><span class="o">[</span><span class="mi">0</span><span class="o">]</span> <span class="o">==</span><span class="mi">0</span><span class="o">)){</span> - <span class="n">kernel</span><span class="o">.</span><span class="na">execute</span><span class="o">(</span><span class="n">HUGE</span><span class="o">);</span> +<head> +<meta content='text/html; charset=UTF-8' http-equiv='Content-Type'> +<meta content='width=device-width, initial-scale=1' name='viewport'> +<meta content='IE=edge' http-equiv='X-UA-Compatible'> +<meta content='no' name='msapplication-tap-highlight'> +<meta content='Aparapi is an Open-source framework for executing native Java code on the GPU, developed by Syncleus.' name='description'> +<title> +Aparapi | New Features +</title> +<!-- Favicons --> +<link href='/images/favicon/apple-touch-icon-152x152.png' rel='apple-touch-icon-precomposed'> +<meta content='#FFFFFF' name='msapplication-TileColor'> +<meta content='/images/favicon/mstile-144x144.png' name='msapplication-TileImage'> +<link href='/images/favicon/favicon-32x32.png' rel='icon' sizes='32x32'> +<!-- Android 5 Chrome Color --> +<meta content='#EE6E73' name='theme-color'> +<!-- CSS --> +<link href='/stylesheets/highlight.css' media='screen,projection' rel='stylesheet' type='text/css'> +<link href='/stylesheets/style.css' media='screen,projection' rel='stylesheet' type='text/css'> +<link href='http://fonts.googleapis.com/css?family=Inconsolata' rel='stylesheet' type='text/css'> +<link href='http://fonts.googleapis.com/icon?family=Material+Icons' rel='stylesheet'> +</head> +<body> +<header> +<div class='container'> +<a class='button-collapse top-nav waves-effect waves-light circle hide-on-large-only' data-activates='nav-mobile' href='#'> +<i class='material-icons'>menu</i> +</a> +</div> +<ul class='side-nav fixed' id='nav-mobile'> +<li class='logo'> +<a class='brand-logo' href='/' id='logo-container'> +<object data='/images/logo.svg' id='front-page-logo' type='image/svg+xml'>Your browser does not support SVG</object> +</a> +</li> +<li class='search'> +<div class='search-wrapper card'> +<input id='search'> +<i class='material-icons'>search</i> +<div class='search-results'></div> +</div> +</li> +<li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/">Overview</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Introduction</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/about.html">About</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/getting-started.html">Getting Started</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/faq.html">FAQ</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Documentation</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/aparapi-patterns.html">Aparapi Patterns</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/choosing-specific-devices.html">Choosing Specific Devices</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/converting-java-to-opencl.html">Converting Java to OpenCL</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/emulating-multiple-entrypoints.html">Emulating Multiple Entrypoints</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/explicit-buffer-handling.html">Explicit Buffer Handling</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/hsa-enabled-lambda.html">HSA Enabled Lambda</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/kernel-guidelines.html">Kernel Guidelines</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/library-agent-duality.html">Library Agent Duality</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/new-features.html">New Features</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/opencl-bindings.html">OpenCL Bindings</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/private-memory-space.html">Private Memory Space</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/profiling-the-kernel.html">Profiling the Kernel</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/setting-up-hsa.html">Setting Up HSA</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/unit-tests.html">Unit Tests</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/using-hsa-simulator.html">Using HSA Simulator</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/constant-memory.html">Constant Memory</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/local-memory.html">Local Memory</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/multiple-dim-ranges.html">Multiple Dim Ranges</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Proposals</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/multiple-dim-nd-range.html">Multiple Dim ND Range</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/lambdas.html">Lambdas</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/address-space-with-buffers.html">Address Space with Buffers</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/extensions.html">Extensions</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/device.html">Device</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/multiple-entry-points.html">Multiple Entry Points</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/lambda-syntax.html">Lambda Syntax</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/showcase.html">Showcase</a></li></ul></li> +</ul> +</header> +<main> +<div class='section no-pad-bot' id='index-banner'> +<div class='container'> +<h1 class='header center-on-small-only'>New Features</h1> +<div class='row center'> +<h4 class='header col s12 light center'>New Features recently added to Aparapi.</h4> +</div> + +</div> + +</div> +<div class='container'> +<h2>New Features</h2> + +<p>Aparapi has two new, especially useful features:</p> + +<p>Explicit Buffer Management for minimizing buffer transfers +Kernel access to objects held in arrays</p> + +<h3>Minimizing Buffer Transfers</h3> + +<h4>Explicit Buffer Management</h4> + +<p>Aparapi is designed to shield the Java developer from dealing with the underlying movement of data between the OpenCL host and device. Aparapi can analyze a kernel’s run() method and run-reachable methods to determine which primitive arrays to transfer to the GPU prior to execution, and which arrays to transfer back when the GPU execution is complete.</p> + +<p>Generally this strategy is both clean and performant. Aparapi will attempt to just do the right thing.</p> + +<p>However, occasionally the following code pattern is seen.</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="kd">final</span> <span class="kt">int</span><span class="o">[]</span> <span class="n">hugeArray</span> <span class="o">=</span> <span class="k">new</span> <span class="kt">int</span><span class="o">[</span><span class="n">HUGE</span><span class="o">];</span> +<span class="kd">final</span> <span class="kt">int</span><span class="o">[]</span> <span class="n">done</span> <span class="o">=</span> <span class="k">new</span> <span class="kt">int</span><span class="o">[]{</span><span class="mi">0</span><span class="o">};</span> +<span class="n">Kernel</span> <span class="n">kernel</span><span class="o">=</span> <span class="k">new</span> <span class="n">Kernel</span><span class="o">(){</span> + <span class="o">...</span> <span class="c1">// reads/writes hugeArray and writes to done[0] when complete</span> +<span class="o">};</span> +<span class="n">done</span><span class="o">[</span><span class="mi">0</span><span class="o">]=</span><span class="mi">0</span><span class="o">;</span> +<span class="k">while</span> <span class="o">(</span><span class="n">done</span><span class="o">[</span><span class="mi">0</span><span class="o">]</span> <span class="o">==</span><span class="mi">0</span><span class="o">)){</span> + <span class="n">kernel</span><span class="o">.</span><span class="na">execute</span><span class="o">(</span><span class="n">HUGE</span><span class="o">);</span> +<span class="o">}</span> +</code></pre></div> +<p>This is a common pattern in reduce stages of map-reduce type problems. Essentially the developer wants to keep executing a kernel until some condition is met. For example, this may be seen in bitonic sort implementations and various financial applications.</p> + +<p>From the code it can be seen that the kernel reads and writes hugeArray[] array and uses the single item done[] array to indicate some form of convergence or completion.</p> + +<p>Unfortunately, by default Aparapi will transfer done[] and hugeArray[] to and from the GPU device each time Kernel.execute(HUGE) is executed.</p> + +<p>To demonstrate which buffers are being transfered, these copies are shown as comments in the following version of the code.</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="kd">final</span> <span class="kt">int</span><span class="o">[]</span> <span class="n">hugeArray</span> <span class="o">=</span> <span class="k">new</span> <span class="kt">int</span><span class="o">[</span><span class="n">HUGE</span><span class="o">];</span> +<span class="kd">final</span> <span class="kt">int</span><span class="o">[]</span> <span class="n">done</span> <span class="o">=</span> <span class="k">new</span> <span class="kt">int</span><span class="o">[]{</span><span class="mi">0</span><span class="o">};</span> +<span class="n">Kernel</span> <span class="n">kernel</span><span class="o">=</span> <span class="k">new</span> <span class="n">Kernel</span><span class="o">(){</span> + <span class="o">...</span> <span class="c1">// reads/writes hugeArray and writes to done[0] when complete</span> +<span class="o">};</span> +<span class="n">done</span><span class="o">[</span><span class="mi">0</span><span class="o">]=</span><span class="mi">0</span><span class="o">;</span> +<span class="k">while</span> <span class="o">(</span><span class="n">done</span><span class="o">[</span><span class="mi">0</span><span class="o">]</span> <span class="o">==</span><span class="mi">0</span><span class="o">)){</span> + <span class="c1">// Send done[] to GPU</span> + <span class="c1">// Send hugeArray[] to GPU</span> + <span class="n">kernel</span><span class="o">.</span><span class="na">execute</span><span class="o">(</span><span class="n">HUGE</span><span class="o">);</span> + <span class="c1">// Fetch done[] from GPU</span> + <span class="c1">// Fetch hugeArray[] from GPU</span> +<span class="o">}</span> +</code></pre></div> +<p>Further analysis of the code reveals that hugeArray[] is not accessed by the loop containing the kernel execution, so Aparapi is performing 999 unnecessary transfers to the device and 999 unnecessary transfers back. Only two transfers of hugeArray[] are needed; one to move the initial data to the GPU and one to move it back after the loop terminates.</p> + +<p>The done[] array is accessed during each iteration (although never written to within the loop), so it does needs to be transferred back for each return from Kernel.execute(), however, it only needs to be sent once.</p> + +<p>Clearly it is better to avoid unnecessary transfers, especially of large buffers like hugeArray[].</p> + +<p>A new Aparapi feature allows the developer to control these situations and explicitly manage transfers.</p> + +<p>To use this feature first set the mode to explicit, using the kernel.setExplicit(true) method, and then requests transfers using either kernel.put() or kernel.get(). Kernel.put() forces a transfer to the GPU device and Kernel.get() transfers data back.</p> + +<p>The following code illustrates the use of these new explicit buffer management APIs.</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="kd">final</span> <span class="kt">int</span><span class="o">[]</span> <span class="n">hugeArray</span> <span class="o">=</span> <span class="k">new</span> <span class="kt">int</span><span class="o">[</span><span class="n">HUGE</span><span class="o">];</span> +<span class="kd">final</span> <span class="kt">int</span><span class="o">[]</span> <span class="n">done</span> <span class="o">=</span> <span class="k">new</span> <span class="kt">int</span><span class="o">[]{</span><span class="mi">0</span><span class="o">};</span> +<span class="n">Kernel</span> <span class="n">kernel</span><span class="o">=</span> <span class="k">new</span> <span class="n">Kernel</span><span class="o">(){</span> + <span class="o">...</span> <span class="c1">// reads/writes hugeArray and writes to done[0] when complete</span> +<span class="o">};</span> +<span class="n">kernel</span><span class="o">.</span><span class="na">setExplicit</span><span class="o">(</span><span class="kc">true</span><span class="o">);</span> +<span class="n">done</span><span class="o">[</span><span class="mi">0</span><span class="o">]=</span><span class="mi">0</span><span class="o">;</span> +<span class="n">kernel</span><span class="o">.</span><span class="na">put</span><span class="o">(</span><span class="n">done</span><span class="o">);</span> +<span class="n">kernel</span><span class="o">.</span><span class="na">put</span><span class="o">(</span><span class="n">hugeArray</span><span class="o">);</span> +<span class="k">while</span> <span class="o">(</span><span class="n">done</span><span class="o">[</span><span class="mi">0</span><span class="o">]</span> <span class="o">==</span><span class="mi">0</span><span class="o">)){</span> + <span class="n">kernel</span><span class="o">.</span><span class="na">execute</span><span class="o">(</span><span class="n">HUGE</span><span class="o">);</span> + <span class="n">kernel</span><span class="o">.</span><span class="na">get</span><span class="o">(</span><span class="n">done</span><span class="o">);</span> +<span class="o">}</span> +<span class="n">kernel</span><span class="o">.</span><span class="na">get</span><span class="o">(</span><span class="n">hugeArray</span><span class="o">);</span> +</code></pre></div> +<p>Note that marking a kernel as explicit and failing to request the appropriate transfer is a programmer error.</p> + +<p>We deliberately made Kernel.put(…), Kernel.get(…) and Kernel.execute(range) return an instance of the executing kernel to allow these calls be chained. Some may find this fluent style API more expressive.</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="kd">final</span> <span class="kt">int</span><span class="o">[]</span> <span class="n">hugeArray</span> <span class="o">=</span> <span class="k">new</span> <span class="kt">int</span><span class="o">[</span><span class="n">HUGE</span><span class="o">];</span> +<span class="kd">final</span> <span class="kt">int</span><span class="o">[]</span> <span class="n">done</span> <span class="o">=</span> <span class="k">new</span> <span class="kt">int</span><span class="o">[]{</span><span class="mi">0</span><span class="o">};</span> +<span class="n">Kernel</span> <span class="n">kernel</span><span class="o">=</span> <span class="k">new</span> <span class="n">Kernel</span><span class="o">(){</span> + <span class="o">...</span> <span class="c1">// reads/writes hugeArray and writes to done[0] when complete</span> +<span class="o">};</span> +<span class="n">kernel</span><span class="o">.</span><span class="na">setExplicit</span><span class="o">(</span><span class="kc">true</span><span class="o">);</span> +<span class="n">done</span><span class="o">[</span><span class="mi">0</span><span class="o">]=</span><span class="mi">0</span><span class="o">;</span> +<span class="n">kernel</span><span class="o">.</span><span class="na">put</span><span class="o">(</span><span class="n">done</span><span class="o">).</span><span class="na">put</span><span class="o">(</span><span class="n">hugeArray</span><span class="o">);</span> <span class="c1">// chained puts</span> +<span class="k">while</span> <span class="o">(</span><span class="n">done</span><span class="o">[</span><span class="mi">0</span><span class="o">]</span> <span class="o">==</span><span class="mi">0</span><span class="o">)){</span> + <span class="n">kernel</span><span class="o">.</span><span class="na">execute</span><span class="o">(</span><span class="n">HUGE</span><span class="o">).</span><span class="na">get</span><span class="o">(</span><span class="n">done</span><span class="o">);</span> <span class="c1">// chained execute and put</span> +<span class="o">}</span> +<span class="n">kernel</span><span class="o">.</span><span class="na">get</span><span class="o">(</span><span class="n">hugeArray</span><span class="o">);</span> +</code></pre></div> +<h4>An alternate approach for loops containing a single kernel.execute(range) call.</h4> + +<p>One variant of code which would normally suggest the use of Explicit Buffer Management can be handled differently. For cases where Kernel.execute(range) is the sole statement inside a loop and where the iteration count is known prior to the first iteration we offer an alternate (hopefully more elegant) way of minimizing buffer transfers.</p> + +<p>So for cases like:-</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="kd">final</span> <span class="kt">int</span><span class="o">[]</span> <span class="n">hugeArray</span> <span class="o">=</span> <span class="k">new</span> <span class="kt">int</span><span class="o">[</span><span class="n">HUGE</span><span class="o">];</span> +<span class="n">Kernel</span> <span class="n">kernel</span><span class="o">=</span> <span class="k">new</span> <span class="n">Kernel</span><span class="o">(){</span> + <span class="o">...</span> <span class="c1">// reads/writes hugeArray</span> +<span class="o">};</span> + +<span class="k">for</span> <span class="o">(</span><span class="kt">int</span> <span class="n">pass</span><span class="o">=</span><span class="mi">0</span><span class="o">;</span> <span class="n">pass</span><span class="o"><</span><span class="mi">1000</span><span class="o">;</span> <span class="n">pass</span><span class="o">++){</span> + <span class="n">kernel</span><span class="o">.</span><span class="na">execute</span><span class="o">(</span><span class="n">HUGE</span><span class="o">);</span> +<span class="o">}</span> +</code></pre></div> +<p>The developer can request that Aparapi perform the outer loop rather than coding the loop. This is achieved explicitly by passing the iteration count as the second argument to Kernel.execute(range, iterations).</p> + +<p>Now any form of code that looks like :-</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="kt">int</span> <span class="n">range</span><span class="o">=</span><span class="mi">1024</span><span class="o">;</span> +<span class="kt">int</span> <span class="n">loopCount</span><span class="o">=</span><span class="mi">64</span><span class="o">;</span> +<span class="k">for</span> <span class="o">(</span><span class="kt">int</span> <span class="n">passId</span><span class="o">=</span><span class="mi">0</span><span class="o">;</span> <span class="n">passId</span><span class="o"><</span><span class="n">loopCount</span><span class="o">;</span> <span class="n">passId</span><span class="o">++){</span> + <span class="n">kernel</span><span class="o">.</span><span class="na">execute</span><span class="o">(</span><span class="n">range</span><span class="o">);</span> +<span class="o">}</span> +</code></pre></div> +<p>Can be replaced with</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="kt">int</span> <span class="n">range</span><span class="o">=</span><span class="mi">1024</span><span class="o">;</span> +<span class="kt">int</span> <span class="n">loopCount</span><span class="o">=</span><span class="mi">64</span><span class="o">;</span> + +<span class="n">kernel</span><span class="o">.</span><span class="na">execute</span><span class="o">(</span><span class="n">range</span><span class="o">,</span> <span class="n">loopCount</span><span class="o">);</span> +</code></pre></div> +<p>Not only does this make the code more compact and avoids the use of explicit buffer management APIs, it allows Aparapi visibility to the complete loop so that Aparapi can minimize the number of transfers. Aparapi will only transfer buffers to the GPU once and transfer them back once, resulting in improved performance.</p> + +<p>Sometimes kernel code using this loop-pattern needs to track the current iteration number as the code passed through the outer loop. Previously we would be forced to use explicit buffer management to allow the kernel to do this.</p> + +<p>The code for this would have looked something like</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="kt">int</span> <span class="n">range</span><span class="o">=</span><span class="mi">1024</span><span class="o">;</span> +<span class="kt">int</span> <span class="n">loopCount</span><span class="o">=</span><span class="mi">64</span><span class="o">;</span> +<span class="kd">final</span> <span class="kt">int</span><span class="o">[]</span> <span class="n">hugeArray</span> <span class="o">=</span> <span class="k">new</span> <span class="kt">int</span><span class="o">[</span><span class="n">HUGE</span><span class="o">];</span> +<span class="kd">final</span> <span class="kt">int</span><span class="o">[]</span> <span class="n">passId</span> <span class="o">=</span> <span class="k">new</span> <span class="kt">int</span><span class="o">[</span><span class="mi">0</span><span class="o">];</span> +<span class="n">Kernel</span> <span class="n">kernel</span><span class="o">=</span> <span class="k">new</span> <span class="n">Kernel</span><span class="o">(){</span> + <span class="nd">@Override</span> <span class="kd">public</span> <span class="kt">void</span> <span class="nf">run</span><span class="o">(){</span> + <span class="kt">int</span> <span class="n">id</span><span class="o">=</span><span class="n">getGlobalId</span><span class="o">();</span> + <span class="k">if</span> <span class="o">(</span><span class="n">passId</span><span class="o">[</span><span class="mi">0</span><span class="o">]</span> <span class="o">==</span> <span class="mi">0</span><span class="o">){</span> + <span class="c1">// perform some initialization!</span> + <span class="o">}</span> + <span class="o">...</span> <span class="c1">// reads/writes hugeArray</span> + <span class="o">}</span> +<span class="o">};</span> +<span class="n">Kernel</span><span class="o">.</span><span class="na">setExplicit</span><span class="o">(</span><span class="kc">true</span><span class="o">);</span> +<span class="n">kernel</span><span class="o">.</span><span class="na">put</span><span class="o">(</span><span class="n">hugeArray</span><span class="o">);</span> +<span class="k">for</span> <span class="o">(</span><span class="n">passId</span><span class="o">[</span><span class="mi">0</span><span class="o">]=</span><span class="mi">0</span><span class="o">;</span> <span class="n">passId</span><span class="o">[</span><span class="mi">0</span><span class="o">]<</span><span class="n">loopCount</span><span class="o">;</span> <span class="n">passId</span><span class="o">[</span><span class="mi">0</span><span class="o">]++){</span> + + <span class="n">kernel</span><span class="o">.</span><span class="na">put</span><span class="o">(</span><span class="n">passId</span><span class="o">).</span><span class="na">execute</span><span class="o">(</span><span class="n">range</span><span class="o">);</span> +<span class="o">}</span> +</code></pre></div> +<p>In the current version of Aparapi we added Kernel.getPassId() to allow a Kernel to determine the current ‘pass’ through the outer loop without having to use explicit buffer management.</p> + +<p>So the previous code can now be written without any explicit buffer management APIs:-</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="kd">final</span> <span class="kt">int</span><span class="o">[]</span> <span class="n">hugeArray</span> <span class="o">=</span> <span class="k">new</span> <span class="kt">int</span><span class="o">[</span><span class="n">HUGE</span><span class="o">];</span> +<span class="kd">final</span> <span class="kt">int</span><span class="o">[]</span> <span class="n">pass</span><span class="o">[]</span> <span class="o">=</span> <span class="k">new</span> <span class="kt">int</span><span class="o">[]{</span><span class="mi">0</span><span class="o">};</span> +<span class="n">Kernel</span> <span class="n">kernel</span><span class="o">=</span> <span class="k">new</span> <span class="n">Kernel</span><span class="o">(){</span> + <span class="nd">@Override</span> <span class="kd">public</span> <span class="kt">void</span> <span class="nf">run</span><span class="o">(){</span> + <span class="kt">int</span> <span class="n">id</span><span class="o">=</span><span class="n">getGlobalId</span><span class="o">();</span> + <span class="kt">int</span> <span class="n">pass</span> <span class="o">=</span> <span class="n">getPassId</span><span class="o">();</span> + <span class="k">if</span> <span class="o">(</span><span class="n">pass</span> <span class="o">==</span> <span class="mi">0</span><span class="o">){</span> + <span class="c1">// perform some initialization!</span> + <span class="o">}</span> + <span class="o">...</span> <span class="c1">// reads/writes both hugeArray</span> + <span class="o">}</span> +<span class="o">};</span> +</code></pre></div> +<p>kernel.execute(HUGE, 1000);</p> + +<p>One common use for Kernel.getPassId() is to avoid flipping buffers in the outer loop.</p> + +<p>It is common for kernels to process data from one buffer to another, and in the next invocation process the data back the other way. Now these kernels can use the passId (odd or even) to determine the direction of data transfer.</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="kd">final</span> <span class="kt">int</span><span class="o">[]</span> <span class="n">arr1</span> <span class="o">=</span> <span class="k">new</span> <span class="kt">int</span><span class="o">[</span><span class="n">HUGE</span><span class="o">];</span> +<span class="kd">final</span> <span class="kt">int</span><span class="o">[]</span> <span class="n">arr2</span> <span class="o">=</span> <span class="k">new</span> <span class="kt">int</span><span class="o">[</span><span class="n">HUGE</span><span class="o">];</span> +<span class="n">Kernel</span> <span class="n">kernel</span><span class="o">=</span> <span class="k">new</span> <span class="n">Kernel</span><span class="o">(){</span> + <span class="kt">int</span> <span class="nf">f</span><span class="o">(</span><span class="kt">int</span> <span class="n">v</span><span class="o">){</span> <span class="err">…</span> <span class="o">}</span> + + <span class="nd">@Override</span> <span class="kd">public</span> <span class="kt">void</span> <span class="nf">run</span><span class="o">(){</span> + <span class="kt">int</span> <span class="n">id</span><span class="o">=</span><span class="n">getGlobalId</span><span class="o">();</span> + <span class="kt">int</span> <span class="n">pass</span> <span class="o">=</span> <span class="n">getPassId</span><span class="o">();</span> + <span class="k">if</span> <span class="o">(</span><span class="n">pass</span><span class="o">%</span><span class="mi">2</span><span class="o">==</span><span class="mi">0</span><span class="o">){</span> + <span class="n">arr1</span><span class="o">[</span><span class="n">id</span><span class="o">]</span> <span class="o">=</span> <span class="n">f</span><span class="o">(</span><span class="n">arr2</span><span class="o">[</span><span class="n">id</span><span class="o">]);</span> + <span class="o">}</span><span class="k">else</span><span class="o">{</span> + <span class="n">arr2</span><span class="o">[</span><span class="n">id</span><span class="o">]</span> <span class="o">=</span> <span class="n">f</span><span class="o">(</span><span class="n">arr1</span><span class="o">[</span><span class="n">id</span><span class="o">]);</span> + + <span class="o">}</span> + <span class="o">}</span> +<span class="o">};</span> +</code></pre></div> +<p>kernel.execute(HUGE, 1000);</p> + +<h4>Allow kernels to access simple arrays of objects</h4> + +<p>Aparapi needs to create OpenCL from the bytecode that it sees. Generally OpenCL constrains us to using parallel primitive arrays (OpenCL allows structs, but Java and OpenCL do not have comparable memory layouts for these structures). Therefore, you will generally need to refactor your code from a classic object-oriented form to use primitive arrays.</p> + +<p>This incompatibility between data-parallel and object-oriented code patterns might discourage use of Aparapi, so Aparapi includes limited support for arrays of simple Objects. Future versions may well extend this functionality and address performance loss.</p> + +<p>Consider the NBody example.</p> + +<p>Typically, a Java developer writing NBody would probably not separate the x,y and z ordinates into parallel arrays of floats as was required in the previous (alpha) version of Aparapi. Instead, a Java developer would probably create a Body class to hold the state of each body and possibly a Universe class (container of Body instances) with the responsible for positioning and possibly displaying the bodies.</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="kd">class</span> <span class="nc">Body</span><span class="o">{</span> + <span class="kt">float</span> <span class="n">x</span><span class="o">,</span><span class="n">y</span><span class="o">,</span><span class="n">z</span><span class="o">;</span> + <span class="kt">float</span> <span class="nf">getX</span><span class="o">(){</span><span class="k">return</span> <span class="n">x</span><span class="o">;}</span> + <span class="kt">void</span> <span class="nf">setX</span><span class="o">(</span><span class="kt">float</span> <span class="n">_x</span><span class="o">){</span> <span class="n">x</span> <span class="o">=</span> <span class="n">_x</span><span class="o">;}</span> + <span class="kt">float</span> <span class="nf">getY</span><span class="o">(){</span><span class="k">return</span> <span class="n">y</span><span class="o">;}</span> + <span class="kt">void</span> <span class="nf">setY</span><span class="o">(</span><span class="kt">float</span> <span class="n">_y</span><span class="o">){</span> <span class="n">y</span> <span class="o">=</span> <span class="n">_y</span><span class="o">;}</span> + <span class="kt">float</span> <span class="nf">getZ</span><span class="o">(){</span><span class="k">return</span> <span class="n">z</span><span class="o">;}</span> + <span class="kt">void</span> <span class="nf">setZ</span><span class="o">(</span><span class="kt">float</span> <span class="n">_z</span><span class="o">){</span> <span class="n">z</span> <span class="o">=</span> <span class="n">_z</span><span class="o">;}</span> + + + <span class="c1">// other data related to Body unused by positioning calculations</span> +<span class="o">}</span> + +<span class="kd">class</span> <span class="nc">Universe</span><span class="o">{</span> + <span class="kd">final</span> <span class="n">Body</span><span class="o">[]</span> <span class="n">bodies</span><span class="o">;</span> + <span class="kd">public</span> <span class="nf">Universe</span><span class="o">(</span><span class="kd">final</span> <span class="n">Body</span><span class="o">[]</span> <span class="n">_bodies</span><span class="o">){</span> + <span class="n">bodies</span> <span class="o">=</span> <span class="n">_bodies</span><span class="o">;</span> + <span class="o">}</span> + <span class="kt">void</span> <span class="nf">adjustPositions</span><span class="o">(){</span> + <span class="k">for</span> <span class="o">(</span><span class="n">Body</span> <span class="nl">outer:</span><span class="n">bodies</span><span class="o">){</span> + <span class="k">for</span> <span class="o">(</span><span class="n">Body</span> <span class="nl">inner:</span><span class="n">bodies</span><span class="o">}{</span> + <span class="c1">// adjust outer position to reflect the effect of inner</span> + <span class="c1">// using inner and outer getters and setters for x, y and z</span> + <span class="o">}</span> + <span class="o">}</span> + <span class="o">}</span> + <span class="kt">void</span> <span class="nf">display</span><span class="o">(){</span> + <span class="k">for</span> <span class="o">(</span><span class="n">Body</span> <span class="nl">body:</span><span class="n">bodies</span><span class="o">){</span> + <span class="c1">// draw body based on x, y and z using Body getters</span> <span class="o">}</span> - </code></pre> - <p>This is a common pattern in reduce stages of map-reduce type problems. Essentially the developer wants to keep executing a kernel until some condition is met. For example, this may be seen in bitonic sort implementations and various financial applications.</p> - - <p>From the code it can be seen that the kernel reads and writes hugeArray[] array and uses the single item done[] array to indicate some form of convergence or completion.</p> - - <p>Unfortunately, by default Aparapi will transfer done[] and hugeArray[] to and from the GPU device each time Kernel.execute(HUGE) is executed.</p> - - <p>To demonstrate which buffers are being transfered, these copies are shown as comments in the following version of the code.</p> - <pre class="highlight java"><code> - <span class="kd">final</span> <span class="kt">int</span><span class="o">[]</span> <span class="n">hugeArray</span> <span class="o">=</span> <span class="k">new</span> <span class="kt">int</span><span class="o">[</span><span class="n">HUGE</span><span class="o">];</span> - <span class="kd">final</span> <span class="kt">int</span><span class="o">[]</span> <span class="n">done</span> <span class="o">=</span> <span class="k">new</span> <span class="kt">int</span><span class="o">[]{</span><span class="mi">0</span><span class="o">};</span> - <span class="n">Kernel</span> <span class="n">kernel</span><span class="o">=</span> <span class="k">new</span> <span class="n">Kernel</span><span class="o">(){</span> - <span class="o">...</span> <span class="c1">// reads/writes hugeArray and writes to done[0] when complete</span> - <span class="o">};</span> - <span class="n">done</span><span class="o">[</span><span class="mi">0</span><span class="o">]=</span><span class="mi">0</span><span class="o">;</span> - <span class="k">while</span> <span class="o">(</span><span class="n">done</span><span class="o">[</span><span class="mi">0</span><span class="o">]</span> <span class="o">==</span><span class="mi">0</span><span class="o">)){</span> - <span class="c1">// Send done[] to GPU</span> - <span class="c1">// Send hugeArray[] to GPU</span> - <span class="n">kernel</span><span class="o">.</span><span class="na">execute</span><span class="o">(</span><span class="n">HUGE</span><span class="o">);</span> - <span class="c1">// Fetch done[] from GPU</span> - <span class="c1">// Fetch hugeArray[] from GPU</span> - <span class="o">}</span> - </code></pre> - <p>Further analysis of the code reveals that hugeArray[] is not accessed by the loop containing the kernel execution, so Aparapi is performing 999 unnecessary transfers to the device and 999 unnecessary transfers back. Only two transfers of hugeArray[] are needed; one to move the initial data to the GPU and one to move it back after the loop terminates.</p> - - <p>The done[] array is accessed during each iteration (although never written to within the loop), so it does needs to be transferred back for each return from Kernel.execute(), however, it only needs to be sent once.</p> - - <p>Clearly it is better to avoid unnecessary transfers, especially of large buffers like hugeArray[].</p> - - <p>A new Aparapi feature allows the developer to control these situations and explicitly manage transfers.</p> - - <p>To use this feature first set the mode to explicit, using the kernel.setExplicit(true) method, and then requests transfers using either kernel.put() or kernel.get(). Kernel.put() forces a transfer to the GPU device and Kernel.get() transfers data back.</p> - - <p>The following code illustrates the use of these new explicit buffer management APIs.</p> - <pre class="highlight java"><code> - <span class="kd">final</span> <span class="kt">int</span><span class="o">[]</span> <span class="n">hugeArray</span> <span class="o">=</span> <span class="k">new</span> <span class="kt">int</span><span class="o">[</span><span class="n">HUGE</span><span class="o">];</span> - <span class="kd">final</span> <span class="kt">int</span><span class="o">[]</span> <span class="n">done</span> <span class="o">=</span> <span class="k">new</span> <span class="kt">int</span><span class="o">[]{</span><span class="mi">0</span><span class="o">};</span> - <span class="n">Kernel</span> <span class="n">kernel</span><span class="o">=</span> <span class="k">new</span> <span class="n">Kernel</span><span class="o">(){</span> - <span class="o">...</span> <span class="c1">// reads/writes hugeArray and writes to done[0] when complete</span> - <span class="o">};</span> - <span class="n">kernel</span><span class="o">.</span><span class="na">setExplicit</span><span class="o">(</span><span class="kc">true</span><span class="o">);</span> - <span class="n">done</span><span class="o">[</span><span class="mi">0</span><span class="o">]=</span><span class="mi">0</span><span class="o">;</span> - <span class="n">kernel</span><span class="o">.</span><span class="na">put</span><span class="o">(</span><span class="n">done</span><span class="o">);</span> - <span class="n">kernel</span><span class="o">.</span><span class="na">put</span><span class="o">(</span><span class="n">hugeArray</span><span class="o">);</span> - <span class="k">while</span> <span class="o">(</span><span class="n">done</span><span class="o">[</span><span class="mi">0</span><span class="o">]</span> <span class="o">==</span><span class="mi">0</span><span class="o">)){</span> - <span class="n">kernel</span><span class="o">.</span><span class="na">execute</span><span class="o">(</span><span class="n">HUGE</span><span class="o">);</span> - <span class="n">kernel</span><span class="o">.</span><span class="na">get</span><span class="o">(</span><span class="n">done</span><span class="o">);</span> - <span class="o">}</span> - <span class="n">kernel</span><span class="o">.</span><span class="na">get</span><span class="o">(</span><span class="n">hugeArray</span><span class="o">);</span> - </code></pre> - <p>Note that marking a kernel as explicit and failing to request the appropriate transfer is a programmer error.</p> - - <p>We deliberately made Kernel.put(…), Kernel.get(…) and Kernel.execute(range) return an instance of the executing kernel to allow these calls be chained. Some may find this fluent style API more expressive.</p> - <pre class="highlight java"><code> - <span class="kd">final</span> <span class="kt">int</span><span class="o">[]</span> <span class="n">hugeArray</span> <span class="o">=</span> <span class="k">new</span> <span class="kt">int</span><span class="o">[</span><span class="n">HUGE</span><span class="o">];</span> - <span class="kd">final</span> <span class="kt">int</span><span class="o">[]</span> <span class="n">done</span> <span class="o">=</span> <span class="k">new</span> <span class="kt">int</span><span class="o">[]{</span><span class="mi">0</span><span class="o">};</span> - <span class="n">Kernel</span> <span class="n">kernel</span><span class="o">=</span> <span class="k">new</span> <span class="n">Kernel</span><span class="o">(){</span> - <span class="o">...</span> <span class="c1">// reads/writes hugeArray and writes to done[0] when complete</span> - <span class="o">};</span> - <span class="n">kernel</span><span class="o">.</span><span class="na">setExplicit</span><span class="o">(</span><span class="kc">true</span><span class="o">);</span> - <span class="n">done</span><span class="o">[</span><span class="mi">0</span><span class="o">]=</span><span class="mi">0</span><span class="o">;</span> - <span class="n">kernel</span><span class="o">.</span><span class="na">put</span><span class="o">(</span><span class="n">done</span><span class="o">).</span><span class="na">put</span><span class="o">(</span><span class="n">hugeArray</span><span class="o">);</span> <span class="c1">// chained puts</span> - <span class="k">while</span> <span class="o">(</span><span class="n">done</span><span class="o">[</span><span class="mi">0</span><span class="o">]</span> <span class="o">==</span><span class="mi">0</span><span class="o">)){</span> - <span class="n">kernel</span><span class="o">.</span><span class="na">execute</span><span class="o">(</span><span class="n">HUGE</span><span class="o">).</span><span class="na">get</span><span class="o">(</span><span class="n">done</span><span class="o">);</span> <span class="c1">// chained execute and put</span> - <span class="o">}</span> - <span class="n">kernel</span><span class="o">.</span><span class="na">get</span><span class="o">(</span><span class="n">hugeArray</span><span class="o">);</span> - </code></pre> - <h4>An alternate approach for loops containing a single kernel.execute(range) call.</h4> - - <p>One variant of code which would normally suggest the use of Explicit Buffer Management can be handled differently. For cases where Kernel.execute(range) is the sole statement inside a loop and where the iteration count is known prior to the first iteration we offer an alternate (hopefully more elegant) way of minimizing buffer transfers.</p> - - <p>So for cases like:-</p> - <pre class="highlight java"><code> - <span class="kd">final</span> <span class="kt">int</span><span class="o">[]</span> <span class="n">hugeArray</span> <span class="o">=</span> <span class="k">new</span> <span class="kt">int</span><span class="o">[</span><span class="n">HUGE</span><span class="o">];</span> - <span class="n">Kernel</span> <span class="n">kernel</span><span class="o">=</span> <span class="k">new</span> <span class="n">Kernel</span><span class="o">(){</span> - <span class="o">...</span> <span class="c1">// reads/writes hugeArray</span> - <span class="o">};</span> - - <span class="k">for</span> <span class="o">(</span><span class="kt">int</span> <span class="n">pass</span><span class="o">=</span><span class="mi">0</span><span class="o">;</span> <span class="n">pass</span><span class="o"><</span><span class="mi">1000</span><span class="o">;</span> <span class="n">pass</span><span class="o">++){</span> - <span class="n">kernel</span><span class="o">.</span><span class="na">execute</span><span class="o">(</span><span class="n">HUGE</span><span class="o">);</span> - <span class="o">}</span> - </code></pre> - <p>The developer can request that Aparapi perform the outer loop rather than coding the loop. This is achieved explicitly by passing the iteration count as the second argument to Kernel.execute(range, iterations).</p> - - <p>Now any form of code that looks like :-</p> - <pre class="highlight java"><code> - <span class="kt">int</span> <span class="n">range</span><span class="o">=</span><span class="mi">1024</span><span class="o">;</span> - <span class="kt">int</span> <span class="n">loopCount</span><span class="o">=</span><span class="mi">64</span><span class="o">;</span> - <span class="k">for</span> <span class="o">(</span><span class="kt">int</span> <span class="n">passId</span><span class="o">=</span><span class="mi">0</span><span class="o">;</span> <span class="n">passId</span><span class="o"><</span><span class="n">loopCount</span><span class="o">;</span> <span class="n">passId</span><span class="o">++){</span> - <span class="n">kernel</span><span class="o">.</span><span class="na">execute</span><span class="o">(</span><span class="n">range</span><span class="o">);</span> - <span class="o">}</span> - </code></pre> - <p>Can be replaced with</p> - <pre class="highlight java"><code> - <span class="kt">int</span> <span class="n">range</span><span class="o">=</span><span class="mi">1024</span><span class="o">;</span> - <span class="kt">int</span> <span class="n">loopCount</span><span class="o">=</span><span class="mi">64</span><span class="o">;</span> - - <span class="n">kernel</span><span class="o">.</span><span class="na">execute</span><span class="o">(</span><span class="n">range</span><span class="o">,</span> <span class="n">loopCount</span><span class="o">);</span> - </code></pre> - <p>Not only does this make the code more compact and avoids the use of explicit buffer management APIs, it allows Aparapi visibility to the complete loop so that Aparapi can minimize the number of transfers. Aparapi will only transfer buffers to the GPU once and transfer them back once, resulting in improved performance.</p> - - <p>Sometimes kernel code using this loop-pattern needs to track the current iteration number as the code passed through the outer loop. Previously we would be forced to use explicit buffer management to allow the kernel to do this.</p> - - <p>The code for this would have looked something like</p> - <pre class="highlight java"><code> - <span class="kt">int</span> <span class="n">range</span><span class="o">=</span><span class="mi">1024</span><span class="o">;</span> - <span class="kt">int</span> <span class="n">loopCount</span><span class="o">=</span><span class="mi">64</span><span class="o">;</span> - <span class="kd">final</span> <span class="kt">int</span><span class="o">[]</span> <span class="n">hugeArray</span> <span class="o">=</span> <span class="k">new</span> <span class="kt">int</span><span class="o">[</span><span class="n">HUGE</span><span class="o">];</span> - <span class="kd">final</span> <span class="kt">int</span><span class="o">[]</span> <span class="n">passId</span> <span class="o">=</span> <span class="k">new</span> <span class="kt">int</span><span class="o">[</span><span class="mi">0</span><span class="o">];</span> - <span class="n">Kernel</span> <span class="n">kernel</span><span class="o">=</span> <span class="k">new</span> <span class="n">Kernel</span><span class="o">(){</span> - <span class="nd">@Override</span> <span class="kd">public</span> <span class="kt">void</span> <span class="nf">run</span><span class="o">(){</span> - <span class="kt">int</span> <span class="n">id</span><span class="o">=</span><span class="n">getGlobalId</span><span class="o">();</span> - <span class="k">if</span> <span class="o">(</span><span class="n">passId</span><span class="o">[</span><span class="mi">0</span><span class="o">]</span> <span class="o">==</span> <span class="mi">0</span><span class="o">){</span> - <span class="c1">// perform some initialization!</span> - <span class="o">}</span> - <span class="o">...</span> <span class="c1">// reads/writes hugeArray</span> - <span class="o">}</span> - <span class="o">};</span> - <span class="n">Kernel</span><span class="o">.</span><span class="na">setExplicit</span><span class="o">(</span><span class="kc">true</span><span class="o">);</span> - <span class="n">kernel</span><span class="o">.</span><span class="na">put</span><span class="o">(</span><span class="n">hugeArray</span><span class="o">);</span> - <span class="k">for</span> <span class="o">(</span><span class="n">passId</span><span class="o">[</span><span class="mi">0</span><span class="o">]=</span><span class="mi">0</span><span class="o">;</span> <span class="n">passId</span><span class="o">[</span><span class="mi">0</span><span class="o">]<</span><span class="n">loopCount</span><span class="o">;</span> <span class="n">passId</span><span class="o">[</span><span class="mi">0</span><span class="o">]++){</span> - - <span class="n">kernel</span><span class="o">.</span><span class="na">put</span><span class="o">(</span><span class="n">passId</span><span class="o">).</span><span class="na">execute</span><span class="o">(</span><span class="n">range</span><span class="o">);</span> - <span class="o">}</span> - </code></pre> - <p>In the current version of Aparapi we added Kernel.getPassId() to allow a Kernel to determine the current ‘pass’ through the outer loop without having to use explicit buffer management.</p> - - <p>So the previous code can now be written without any explicit buffer management APIs:-</p> - <pre class="highlight java"><code> - <span class="kd">final</span> <span class="kt">int</span><span class="o">[]</span> <span class="n">hugeArray</span> <span class="o">=</span> <span class="k">new</span> <span class="kt">int</span><span class="o">[</span><span class="n">HUGE</span><span class="o">];</span> - <span class="kd">final</span> <span class="kt">int</span><span class="o">[]</span> <span class="n">pass</span><span class="o">[]</span> <span class="o">=</span> <span class="k">new</span> <span class="kt">int</span><span class="o">[]{</span><span class="mi">0</span><span class="o">};</span> - <span class="n">Kernel</span> <span class="n">kernel</span><span class="o">=</span> <span class="k">new</span> <span class="n">Kernel</span><span class="o">(){</span> - <span class="nd">@Override</span> <span class="kd">public</span> <span class="kt">void</span> <span class="nf">run</span><span class="o">(){</span> - <span class="kt">int</span> <span class="n">id</span><span class="o">=</span><span class="n">getGlobalId</span><span class="o">();</span> - <span class="kt">int</span> <span class="n">pass</span> <span class="o">=</span> <span class="n">getPassId</span><span class="o">();</span> - <span class="k">if</span> <span class="o">(</span><span class="n">pass</span> <span class="o">==</span> <span class="mi">0</span><span class="o">){</span> - <span class="c1">// perform some initialization!</span> - <span class="o">}</span> - <span class="o">...</span> <span class="c1">// reads/writes both hugeArray</span> - <span class="o">}</span> - <span class="o">};</span> - </code></pre> - <p>kernel.execute(HUGE, 1000);</p> - - <p>One common use for Kernel.getPassId() is to avoid flipping buffers in the outer loop.</p> - - <p>It is common for kernels to process data from one buffer to another, and in the next invocation process the data back the other way. Now these kernels can use the passId (odd or even) to determine the direction of data transfer.</p> - <pre class="highlight java"><code> - <span class="kd">final</span> <span class="kt">int</span><span class="o">[]</span> <span class="n">arr1</span> <span class="o">=</span> <span class="k">new</span> <span class="kt">int</span><span class="o">[</span><span class="n">HUGE</span><span class="o">];</span> - <span class="kd">final</span> <span class="kt">int</span><span class="o">[]</span> <span class="n">arr2</span> <span class="o">=</span> <span class="k">new</span> <span class="kt">int</span><span class="o">[</span><span class="n">HUGE</span><span class="o">];</span> - <span class="n">Kernel</span> <span class="n">kernel</span><span class="o">=</span> <span class="k">new</span> <span class="n">Kernel</span><span class="o">(){</span> - <span class="kt">int</span> <span class="nf">f</span><span class="o">(</span><span class="kt">int</span> <span class="n">v</span><span class="o">){</span> <span class="err">…</span> <span class="o">}</span> - - <span class="nd">@Override</span> <span class="kd">public</span> <span class="kt">void</span> <span class="nf">run</span><span class="o">(){</span> - <span class="kt">int</span> <span class="n">id</span><span class="o">=</span><span class="n">getGlobalId</span><span class="o">();</span> - <span class="kt">int</span> <span class="n">pass</span> <span class="o">=</span> <span class="n">getPassId</span><span class="o">();</span> - <span class="k">if</span> <span class="o">(</span><span class="n">pass</span><span class="o">%</span><span class="mi">2</span><span class="o">==</span><span class="mi">0</span><span class="o">){</span> - <span class="n">arr1</span><span class="o">[</span><span class="n">id</span><span class="o">]</span> <span class="o">=</span> <span class="n">f</span><span class="o">(</span><span class="n">arr2</span><span class="o">[</span><span class="n">id</span><span class="o">]);</span> - <span class="o">}</span><span class="k">else</span><span class="o">{</span> - <span class="n">arr2</span><span class="o">[</span><span class="n">id</span><span class="o">]</span> <span class="o">=</span> <span class="n">f</span><span class="o">(</span><span class="n">arr1</span><span class="o">[</span><span class="n">id</span><span class="o">]);</span> - - <span class="o">}</span> - <span class="o">}</span> - <span class="o">};</span> - </code></pre> - <p>kernel.execute(HUGE, 1000);</p> - - <h4>Allow kernels to access simple arrays of objects</h4> - - <p>Aparapi needs to create OpenCL from the bytecode that it sees. Generally OpenCL constrains us to using parallel primitive arrays (OpenCL allows structs, but Java and OpenCL do not have comparable memory layouts for these structures). Therefore, you will generally need to refactor your code from a classic object-oriented form to use primitive arrays.</p> - - <p>This incompatibility between data-parallel and object-oriented code patterns might discourage use of Aparapi, so Aparapi includes limited support for arrays of simple Objects. Future versions may well extend this functionality and address performance loss.</p> - - <p>Consider the NBody example.</p> - - <p>Typically, a Java developer writing NBody would probably not separate the x,y and z ordinates into parallel arrays of floats as was required in the previous (alpha) version of Aparapi. Instead, a Java developer would probably create a Body class to hold the state of each body and possibly a Universe class (container of Body instances) with the responsible for positioning and possibly displaying the bodies.</p> - <pre class="highlight java"><code> - <span class="kd">class</span> <span class="nc">Body</span><span class="o">{</span> - <span class="kt">float</span> <span class="n">x</span><span class="o">,</span><span class="n">y</span><span class="o">,</span><span class="n">z</span><span class="o">;</span> - <span class="kt">float</span> <span class="nf">getX</span><span class="o">(){</span><span class="k">return</span> <span class="n">x</span><span class="o">;}</span> - <span class="kt">void</span> <span class="nf">setX</span><span class="o">(</span><span class="kt">float</span> <span class="n">_x</span><span class="o">){</span> <span class="n">x</span> <span class="o">=</span> <span class="n">_x</span><span class="o">;}</span> - <span class="kt">float</span> <span class="nf">getY</span><span class="o">(){</span><span class="k">return</span> <span class="n">y</span><span class="o">;}</span> - <span class="kt">void</span> <span class="nf">setY</span><span class="o">(</span><span class="kt">float</span> <span class="n">_y</span><span class="o">){</span> <span class="n">y</span> <span class="o">=</span> <span class="n">_y</span><span class="o">;}</span> - <span class="kt">float</span> <span class="nf">getZ</span><span class="o">(){</span><span class="k">return</span> <span class="n">z</span><span class="o">;}</span> - <span class="kt">void</span> <span class="nf">setZ</span><span class="o">(</span><span class="kt">float</span> <span class="n">_z</span><span class="o">){</span> <span class="n">z</span> <span class="o">=</span> <span class="n">_z</span><span class="o">;}</span> - - - <span class="c1">// other data related to Body unused by positioning calculations</span> - <span class="o">}</span> - - <span class="kd">class</span> <span class="nc">Universe</span><span class="o">{</span> - <span class="kd">final</span> <span class="n">Body</span><span class="o">[]</span> <span class="n">bodies</span><span class="o">;</span> - <span class="kd">public</span> <span class="nf">Universe</span><span class="o">(</span><span class="kd">final</span> <span class="n">Body</span><span class="o">[]</span> <span class="n">_bodies</span><span class="o">){</span> - <span class="n">bodies</span> <span class="o">=</span> <span class="n">_bodies</span><span class="o">;</span> - <span class="o">}</span> - <span class="kt">void</span> <span class="nf">adjustPositions</span><span class="o">(){</span> - <span class="k">for</span> <span class="o">(</span><span class="n">Body</span> <span class="nl">outer:</span><span class="n">bodies</span><span class="o">){</span> - <span class="k">for</span> <span class="o">(</span><span class="n">Body</span> <span class="nl">inner:</span><span class="n">bodies</span><span class="o">}{</span> - <span class="c1">// adjust outer position to reflect the effect of inner</span> - <span class="c1">// using inner and outer getters and setters for x, y and z</span> - <span class="o">}</span> - <span class="o">}</span> - <span class="o">}</span> - <span class="kt">void</span> <span class="nf">display</span><span class="o">(){</span> - <span class="k">for</span> <span class="o">(</span><span class="n">Body</span> <span class="nl">body:</span><span class="n">bodies</span><span class="o">){</span> - <span class="c1">// draw body based on x, y and z using Body getters</span> - <span class="o">}</span> - <span class="o">}</span> - <span class="o">}</span> - </code></pre> - <p>From the above code we see that the <code>Universe.adjustPositions()</code> method is compute intensive and an ideal candidate for refactoring to use Aparapi. The current version of Aparapi is able to deal with simple arrays of objects like this.</p> - - <p>Now when Aparapi encounters an array of objects and the accesses to these objects are constrained to simple getters and setters, Aparapi will automatically extract the values of the accessed fields into a data parallel form, execute the kernel and then replace the results back in the original objects in the array. This happens on each call to Kernel.execute() and is fairly costly (from a performance point of view), however, for embarrassingly parallel code (such as NBody), we can still show considerable performance gains over standard Java Thread Pool</p> - </div> - </main> - <footer class='page-footer'> - <div class='container'> - <div class='row'> - <div class='col l4 s12'> - <h5 class='white-text'>Help Aparapi Grow</h5> - <p class='grey-text text-lighten-4'>We are a team of volunteers working on this project like it's our full time job. Any amount would help support and continue development on this project and is greatly appreciated.</p> - <form action='https://www.paypal.com/cgi-bin/webscr' id='paypal-donate' method='post' target='_top'> - <input name='cmd' type='hidden' value='_s-xclick'> - <input name='encrypted' type='hidden' value='-----BEGIN PKCS7-----MIIHoAYJKoZIhvcNAQcEoIIHkTCCB40CAQExggEwMIIBLAIBADCBlDCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20CAQAwDQYJKoZIhvcNAQEBBQAEgYATcKxN8t35TG2x34eY272SuZO3QbGy+BTGIM5DRV6Hmosotzw2TF42ceWmbXb3Gk4Wy5kUgo4TgHExCZHUSlHUl+A9KWLFejotgQJPhbiBsnns3klWbKftA3LEnP/kz/SW7OyBlpluoHoEGb354/aoX3JEctp3akHiZEmD7JyEgjELMAkGBSsOAwIaBQAwggEcBgkqhkiG9w0BBwEwFAYIKoZIhvcNAwcECOGCJwba6JICgIH4RtE1LE3juagKs+swI5tb9Y2LacWo+qn1H1aLKeg57bQMqqcWYvkoO1joYoglPc1h4mO0egZjHPQ6ih0K0IYlXw2SRpNylSlIMUE3GW6smjSSwRhscZfXQYUnmQsfYvkFwoKrlZGf/1u0Q7nwlZ1szIKnDMZ5f+k8xBcM0sMNutn/y9CH6A3zo01gQBIF29+1WYAoQspNAnfWQy3ydV7nbjIA9ThDp2WquWw3EVlvqlvm/3C2AFuH/L4q0ltn3qjkCdzXK0O2jW3TRrzligPkAy6CN0Tw2jGW5GENNC1L92vHFH4kBXUPlhvw39TgoN7/KRUjVoYPYgugggOHMIIDgzCCAuygAwIBAgIBADANBgkqhkiG9w0BAQUFADCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20wHhcNMDQwMjEzMTAxMzE1WhcNMzUwMjEzMTAxMzE1WjCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20wgZ8wDQYJKoZIhvcNAQEBBQADgY0AMIGJAoGBAMFHTt38RMxLXJyO2SmS+Ndl72T7oKJ4u4uw+6awntALWh03PewmIJuzbALScsTS4sZoS1fKciBGoh11gIfHzylvkdNe/hJl66/RGqrj5rFb08sAABNTzDTiqqNpJeBsYs/c2aiGozptX2RlnBktH+SUNpAajW724Nv2Wvhif6sFAgMBAAGjge4wgeswHQYDVR0OBBYEFJaffLvGbxe9WT9S1wob7BDWZJRrMIG7BgNVHSMEgbMwgbCAFJaffLvGbxe9WT9S1wob7BDWZJRroYGUpIGRMIGOMQswCQYDVQQGEwJVUzELMAkGA1UECBMCQ0ExFjAUBgNVBAcTDU1vdW50YWluIFZpZXcxFDASBgNVBAoTC1BheVBhbCBJbmMuMRMwEQYDVQQLFApsaXZlX2NlcnRzMREwDwYDVQQDFAhsaXZlX2FwaTEcMBoGCSqGSIb3DQEJARYNcmVAcGF5cGFsLmNvbYIBADAMBgNVHRMEBTADAQH/MA0GCSqGSIb3DQEBBQUAA4GBAIFfOlaagFrl71+jq6OKidbWFSE+Q4FqROvdgIONth+8kSK//Y/4ihuE4Ymvzn5ceE3S/iBSQQMjyvb+s2TWbQYDwcp129OPIbD9epdr4tJOUNiSojw7BHwYRiPh58S1xGlFgHFXwrEBb3dgNbMUa+u4qectsMAXpVHnD9wIyfmHMYIBmjCCAZYCAQEwgZQwgY4xCzAJBgNVBAYTAlVTMQswCQYDVQQIEwJDQTEWMBQGA1UEBxMNTW91bnRhaW4gVmlldzEUMBIGA1UEChMLUGF5UGFsIEluYy4xEzARBgNVBAsUCmxpdmVfY2VydHMxETAPBgNVBAMUCGxpdmVfYXBpMRwwGgYJKoZIhvcNAQkBFg1yZUBwYXlwYWwuY29tAgEAMAkGBSsOAwIaBQCgXTAYBgkqhkiG9w0BCQMxCwYJKoZIhvcNAQcBMBwGCSqGSIb3DQEJBTEPFw0xNjExMjkyMjA1NTNaMCMGCSqGSIb3DQEJBDEWBBS4i3Exr/pFcKOJy8uKmH+nGIMjqDANBgkqhkiG9w0BAQEFAASBgDAbFZ2jieloeB/0wCAcvYCFAIXmmBaMS5js/byzU5gK7exSTlRMX74IkmHemItaOcw3wyFlu4i118D9K9SbSbFiX9DGDcezGh42u/6G8TuZMwlvmiehwMioTVcm4jWG40YLiv8pJZypfoSx2w4IAFb4na5i/E1qOrwQOpiBho+s-----END PKCS7-----'> - <button alt='PayPal - The safer, easier way to pay online!' class='btn waves-effect waves-light red lighten-3' name='action' type='submit'> - Donate Now - </button> - </form> - </div> - <div class='col l4 s12'> - <h5 class='white-text'>Join the Discussion</h5> - <p class='grey-text text-lighten-4'>We have a Gitter chat room set up where you can talk directly with us. Come in and discuss new features, future goals, general problems or questions, or anything else you can think of.</p> - <a class='btn waves-effect waves-light red lighten-3' href='https://gitter.im/Syncleus/aparapi' target='_blank'>Chat</a> - </div> - <div class='col l4 s12' style='overflow: hidden;'> - <h5 class='white-text'>Connect</h5> - <iframe allowtransparency='true' frameborder='0' height='30' scrolling='0' src='http://ghbtns.com/github-btn.html?user=Syncleus&repo=aparapi&type=watch&count=true&size=large' width='170'></iframe> - <br> - <a class='twitter-follow-button' data-dnt='true' data-show-count='true' data-size='large' href='https://twitter.com/AparapiLib'>Follow @AparapiLib</a> - <br> - <div class='g-follow' data-annotation='bubble' data-height='24' data-href='https://plus.google.com/102266131584900704956' data-rel='publisher'></div> - </div> - </div> - </div> - <div class='footer-copyright'> - <div class='container'> - © 2016-2017 Syncleus, All rights reserved. - <a class='grey-text text-lighten-4 right' href='https://github.com/Syncleus/aparapi/blob/master/LICENSE'>Apache License v2</a> - </div> - </div> - </footer> - <!-- Scripts --> - <script src='https://code.jquery.com/jquery-2.1.4.min.js'></script> - <script> - if (!window.jQuery) { document.write('<script src="bin/jquery-2.1.1.min.js"><\/script>'); } - </script> - <script src='/javascripts/jquery.timeago.js'></script> - <script src='/javascripts/materialize.min.js'></script> - <script src='/javascripts/lunr.min.js'></script> - <script src='/javascripts/search.js'></script> - <script src='/javascripts/materialize.js'></script> - <script src='/javascripts/init.js'></script> - <!-- Twitter Button --> - <script> - !function(d,s,id){var js,fjs=d.getElementsByTagName(s)[0],p=/^http:/.test(d.location)?'http':'https';if(!d.getElementById(id)){js=d.createElement(s);js.id=id;js.src=p+'://platform.twitter.com/widgets.js';fjs.parentNode.insertBefore(js,fjs);}}(document, 'script', 'twitter-wjs'); - </script> - <!-- Google Plus Button --> - <script async='' defer='defer' src='https://apis.google.com/js/platform.js'></script> - </body> + <span class="o">}</span> +<span class="o">}</span> +</code></pre></div> +<p>From the above code we see that the <code>Universe.adjustPositions()</code> method is compute intensive and an ideal candidate for refactoring to use Aparapi. The current version of Aparapi is able to deal with simple arrays of objects like this.</p> + +<p>Now when Aparapi encounters an array of objects and the accesses to these objects are constrained to simple getters and setters, Aparapi will automatically extract the values of the accessed fields into a data parallel form, execute the kernel and then replace the results back in the original objects in the array. This happens on each call to Kernel.execute() and is fairly costly (from a performance point of view), however, for embarrassingly parallel code (such as NBody), we can still show considerable performance gains over standard Java Thread Pool</p> + +</div> +</main> +<footer class='page-footer'> +<div class='container'> +<div class='row'> +<div class='col l4 s12'> +<h5 class='white-text'>Help Aparapi Grow</h5> +<p class='grey-text text-lighten-4'>We are a team of volunteers working on this project like it's our full time job. Any amount would help support and continue development on this project and is greatly appreciated.</p> +<form action='https://www.paypal.com/cgi-bin/webscr' id='paypal-donate' method='post' target='_top'> +<input name='cmd' type='hidden' value='_s-xclick'> +<input name='encrypted' type='hidden' value='-----BEGIN PKCS7-----MIIHoAYJKoZIhvcNAQcEoIIHkTCCB40CAQExggEwMIIBLAIBADCBlDCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20CAQAwDQYJKoZIhvcNAQEBBQAEgYATcKxN8t35TG2x34eY272SuZO3QbGy+BTGIM5DRV6Hmosotzw2TF42ceWmbXb3Gk4Wy5kUgo4TgHExCZHUSlHUl+A9KWLFejotgQJPhbiBsnns3klWbKftA3LEnP/kz/SW7OyBlpluoHoEGb354/aoX3JEctp3akHiZEmD7JyEgjELMAkGBSsOAwIaBQAwggEcBgkqhkiG9w0BBwEwFAYIKoZIhvcNAwcECOGCJwba6JICgIH4RtE1LE3juagKs+swI5tb9Y2LacWo+qn1H1aLKeg57bQMqqcWYvkoO1joYoglPc1h4mO0egZjHPQ6ih0K0IYlXw2SRpNylSlIMUE3GW6smjSSwRhscZfXQYUnmQsfYvkFwoKrlZGf/1u0Q7nwlZ1szIKnDMZ5f+k8xBcM0sMNutn/y9CH6A3zo01gQBIF29+1WYAoQspNAnfWQy3ydV7nbjIA9ThDp2WquWw3EVlvqlvm/3C2AFuH/L4q0ltn3qjkCdzXK0O2jW3TRrzligPkAy6CN0Tw2jGW5GENNC1L92vHFH4kBXUPlhvw39TgoN7/KRUjVoYPYgugggOHMIIDgzCCAuygAwIBAgIBADANBgkqhkiG9w0BAQUFADCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20wHhcNMDQwMjEzMTAxMzE1WhcNMzUwMjEzMTAxMzE1WjCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20wgZ8wDQYJKoZIhvcNAQEBBQADgY0AMIGJAoGBAMFHTt38RMxLXJyO2SmS+Ndl72T7oKJ4u4uw+6awntALWh03PewmIJuzbALScsTS4sZoS1fKciBGoh11gIfHzylvkdNe/hJl66/RGqrj5rFb08sAABNTzDTiqqNpJeBsYs/c2aiGozptX2RlnBktH+SUNpAajW724Nv2Wvhif6sFAgMBAAGjge4wgeswHQYDVR0OBBYEFJaffLvGbxe9WT9S1wob7BDWZJRrMIG7BgNVHSMEgbMwgbCAFJaffLvGbxe9WT9S1wob7BDWZJRroYGUpIGRMIGOMQswCQYDVQQGEwJVUzELMAkGA1UECBMCQ0ExFjAUBgNVBAcTDU1vdW50YWluIFZpZXcxFDASBgNVBAoTC1BheVBhbCBJbmMuMRMwEQYDVQQLFApsaXZlX2NlcnRzMREwDwYDVQQDFAhsaXZlX2FwaTEcMBoGCSqGSIb3DQEJARYNcmVAcGF5cGFsLmNvbYIBADAMBgNVHRMEBTADAQH/MA0GCSqGSIb3DQEBBQUAA4GBAIFfOlaagFrl71+jq6OKidbWFSE+Q4FqROvdgIONth+8kSK//Y/4ihuE4Ymvzn5ceE3S/iBSQQMjyvb+s2TWbQYDwcp129OPIbD9epdr4tJOUNiSojw7BHwYRiPh58S1xGlFgHFXwrEBb3dgNbMUa+u4qectsMAXpVHnD9wIyfmHMYIBmjCCAZYCAQEwgZQwgY4xCzAJBgNVBAYTAlVTMQswCQYDVQQIEwJDQTEWMBQGA1UEBxMNTW91bnRhaW4gVmlldzEUMBIGA1UEChMLUGF5UGFsIEluYy4xEzARBgNVBAsUCmxpdmVfY2VydHMxETAPBgNVBAMUCGxpdmVfYXBpMRwwGgYJKoZIhvcNAQkBFg1yZUBwYXlwYWwuY29tAgEAMAkGBSsOAwIaBQCgXTAYBgkqhkiG9w0BCQMxCwYJKoZIhvcNAQcBMBwGCSqGSIb3DQEJBTEPFw0xNjExMjkyMjA1NTNaMCMGCSqGSIb3DQEJBDEWBBS4i3Exr/pFcKOJy8uKmH+nGIMjqDANBgkqhkiG9w0BAQEFAASBgDAbFZ2jieloeB/0wCAcvYCFAIXmmBaMS5js/byzU5gK7exSTlRMX74IkmHemItaOcw3wyFlu4i118D9K9SbSbFiX9DGDcezGh42u/6G8TuZMwlvmiehwMioTVcm4jWG40YLiv8pJZypfoSx2w4IAFb4na5i/E1qOrwQOpiBho+s-----END PKCS7----- +'> +<button alt='PayPal - The safer, easier way to pay online!' class='btn waves-effect waves-light red lighten-3' name='action' type='submit'> +Donate Now +</button> +</form> +</div> +<div class='col l4 s12'> +<h5 class='white-text'>Join the Discussion</h5> +<p class='grey-text text-lighten-4'>We have a Gitter chat room set up where you can talk directly with us. Come in and discuss new features, future goals, general problems or questions, or anything else you can think of.</p> +<a class='btn waves-effect waves-light red lighten-3' href='https://gitter.im/Syncleus/aparapi' target='_blank'>Chat</a> +</div> +<div class='col l4 s12' style='overflow: hidden;'> +<h5 class='white-text'>Connect</h5> +<iframe allowtransparency='true' frameborder='0' height='30' scrolling='0' src='http://ghbtns.com/github-btn.html?user=Syncleus&repo=aparapi&type=watch&count=true&size=large' width='170'></iframe> +<br> +<a class='twitter-follow-button' data-dnt='true' data-show-count='true' data-size='large' href='https://twitter.com/AparapiLib'>Follow @AparapiLib</a> +<br> +<div class='g-follow' data-annotation='bubble' data-height='24' data-href='https://plus.google.com/102266131584900704956' data-rel='publisher'></div> +</div> +</div> +</div> +<div class='footer-copyright'> +<div class='container'> +© 2016-2017 Syncleus, All rights reserved. +<a class='grey-text text-lighten-4 right' href='https://github.com/Syncleus/aparapi/blob/master/LICENSE'>Apache License v2</a> +</div> +</div> +</footer> +<!-- Scripts --> +<script src='https://code.jquery.com/jquery-2.1.4.min.js'></script> +<script> + if (!window.jQuery) { document.write('<script src="bin/jquery-2.1.1.min.js"><\/script>'); } +</script> +<script src='/javascripts/jquery.timeago.js'></script> +<script src='/javascripts/materialize.min.js'></script> +<script src='/javascripts/lunr.min.js'></script> +<script src='/javascripts/search.js'></script> +<script src='/javascripts/materialize.js'></script> +<script src='/javascripts/init.js'></script> +<!-- Twitter Button --> +<script> + !function(d,s,id){var js,fjs=d.getElementsByTagName(s)[0],p=/^http:/.test(d.location)?'http':'https';if(!d.getElementById(id)){js=d.createElement(s);js.id=id;js.src=p+'://platform.twitter.com/widgets.js';fjs.parentNode.insertBefore(js,fjs);}}(document, 'script', 'twitter-wjs'); +</script> +<!-- Google Plus Button --> +<script async='' defer='defer' src='https://apis.google.com/js/platform.js'></script> +</body> </html> diff --git a/documentation/opencl-bindings.html b/documentation/opencl-bindings.html index 7746fea0aa1fbcbb7b025d24bd73ca080872f7ca..6daa9e86b5a9c6220d1b6ee518099139029dbdcb 100644 --- a/documentation/opencl-bindings.html +++ b/documentation/opencl-bindings.html @@ -1,170 +1,171 @@ <!DOCTYPE html> <html lang='en'> - <head> - <meta content='text/html; charset=UTF-8' http-equiv='Content-Type'> - <meta content='width=device-width, initial-scale=1' name='viewport'> - <meta content='IE=edge' http-equiv='X-UA-Compatible'> - <meta content='no' name='msapplication-tap-highlight'> - <meta content='Aparapi is an Open-source framework for executing native Java code on the GPU, developed by Syncleus.' name='description'> - <title> - Aparapi | OpenCL Bindings - </title> - <!-- Favicons --> - <link href='/images/favicon/apple-touch-icon-152x152.png' rel='apple-touch-icon-precomposed'> - <meta content='#FFFFFF' name='msapplication-TileColor'> - <meta content='/images/favicon/mstile-144x144.png' name='msapplication-TileImage'> - <link href='/images/favicon/favicon-32x32.png' rel='icon' sizes='32x32'> - <!-- Android 5 Chrome Color --> - <meta content='#EE6E73' name='theme-color'> - <!-- CSS --> - <link href='/stylesheets/highlight.css' media='screen,projection' rel='stylesheet' type='text/css'> - <link href='/stylesheets/style.css' media='screen,projection' rel='stylesheet' type='text/css'> - <link href='http://fonts.googleapis.com/css?family=Inconsolata' rel='stylesheet' type='text/css'> - <link href='http://fonts.googleapis.com/icon?family=Material+Icons' rel='stylesheet'> - </head> - <body> - <header> - <div class='container'> - <a class='button-collapse top-nav waves-effect waves-light circle hide-on-large-only' data-activates='nav-mobile' href='#'> - <i class='material-icons'>menu</i> - </a> - </div> - <ul class='side-nav fixed' id='nav-mobile'> - <li class='logo'> - <a class='brand-logo' href='/' id='logo-container'> - <object data='/images/logo.svg' id='front-page-logo' type='image/svg+xml'>Your browser does not support SVG</object> - </a> - </li> - <li class='search'> - <div class='search-wrapper card'> - <input id='search'> - <i class='material-icons'>search</i> - <div class='search-results'></div> - </div> - </li> - <li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/">Overview</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Introduction</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/about.html">About</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/getting-started.html">Getting Started</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/faq.html">FAQ</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Documentation</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/aparapi-patterns.html">Aparapi Patterns</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/choosing-specific-devices.html">Choosing Specific Devices</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/converting-java-to-opencl.html">Converting Java to OpenCL</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/emulating-multiple-entrypoints.html">Emulating Multiple Entrypoints</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/explicit-buffer-handling.html">Explicit Buffer Handling</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/hsa-enabled-lambda.html">HSA Enabled Lambda</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/kernel-guidelines.html">Kernel Guidelines</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/library-agent-duality.html">Library Agent Duality</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/new-features.html">New Features</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/opencl-bindings.html">OpenCL Bindings</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/private-memory-space.html">Private Memory Space</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/profiling-the-kernel.html">Profiling the Kernel</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/setting-up-hsa.html">Setting Up HSA</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/unit-tests.html">Unit Tests</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/using-hsa-simulator.html">Using HSA Simulator</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/constant-memory.html">Constant Memory</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/local-memory.html">Local Memory</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/multiple-dim-ranges.html">Multiple Dim Ranges</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Proposals</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/multiple-dim-nd-range.html">Multiple Dim ND Range</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/lambdas.html">Lambdas</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/address-space-with-buffers.html">Address Space with Buffers</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/extensions.html">Extensions</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/device.html">Device</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/multiple-entry-points.html">Multiple Entry Points</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/lambda-syntax.html">Lambda Syntax</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/showcase.html">Showcase</a></li></ul></li> - </ul> - </header> - <main> - <div class='section no-pad-bot' id='index-banner'> - <div class='container'> - <h1 class='header center-on-small-only'>OpenCL Bindings</h1> - <div class='row center'> - <h4 class='header col s12 light center'>How to use new OpenCL binding mechanism.</h4> - </div> - - </div> - - </div> - <div class='container'> - <p>As a step towards the extension mechanism I needed a way to easily bind OpenCL to an interface.</p> - - <p>Here is what I have come up with. We will use the ‘Square’ example.</p> - - <p>You first define an interface with OpenCL annotations..</p> - <pre class="highlight java"><code> - <span class="kd">interface</span> <span class="nc">Squarer</span> <span class="kd">extends</span> <span class="n">OpenCL</span><span class="o"><</span><span class="n">Squarer</span><span class="o">>{</span> - <span class="nd">@Kernel</span><span class="o">(</span><span class="s">"{\n"</span><span class="c1">//</span> - <span class="o">+</span> <span class="s">" const size_t id = get_global_id(0);\n"</span><span class="c1">//</span> - <span class="o">+</span> <span class="s">" out[id] = in[id]*in[id];\n"</span><span class="c1">//</span> - <span class="o">+</span> <span class="s">"}\n"</span><span class="o">)</span><span class="c1">//</span> - <span class="kd">public</span> <span class="n">Squarer</span> <span class="nf">square</span><span class="o">(</span><span class="c1">//</span> - <span class="n">Range</span> <span class="n">_range</span><span class="o">,</span><span class="c1">//</span> - <span class="nd">@GlobalReadOnly</span><span class="o">(</span><span class="s">"in"</span><span class="o">)</span> <span class="kt">float</span><span class="o">[]</span> <span class="n">in</span><span class="o">,</span><span class="c1">//</span> - <span class="nd">@GlobalWriteOnly</span><span class="o">(</span><span class="s">"out"</span><span class="o">)</span> <span class="kt">float</span><span class="o">[]</span> <span class="n">out</span><span class="o">);</span> - <span class="o">}</span> - </code></pre> - <p>This describes the API we wish to bind to a set of kernel entrypoints (here we only have one, but we could have many). Then you 'realize’ the interface by asking a device to create an implementation of the interface. Device is a new Aparapi class which represents a GPU or CPU OpenCL device. So here we are asking for the first (default) GPU device to realize the interface.</p> - <pre class="highlight java"><code> - <span class="n">Squarer</span> <span class="n">squarer</span> <span class="o">=</span> <span class="n">Device</span><span class="o">.</span><span class="na">firstGPU</span><span class="o">(</span><span class="n">Squarer</span><span class="o">.</span><span class="na">class</span><span class="o">);</span> - </code></pre> - <p>Now you can call the implementation directly with a Range.</p> - <pre class="highlight java"><code> - <span class="n">squarer</span><span class="o">.</span><span class="na">square</span><span class="o">(</span><span class="n">Range</span><span class="o">.</span><span class="na">create</span><span class="o">(</span><span class="n">in</span><span class="o">.</span><span class="na">length</span><span class="o">),</span> <span class="n">in</span><span class="o">,</span> <span class="n">out</span><span class="o">);</span> - </code></pre> - <p>I think that we will have the easiest OpenCL binding out there…</p> - - <p>Following some conversations/suggestions online http://a-hackers-craic.blogspot.com/2012/03/aparapi.html we could also offer the ability to provide the OpenCL source from a file/url course using interface level Annotations.</p> - - <p>So we could allow.</p> - <pre class="highlight java"><code> - <span class="nd">@OpenCL</span><span class="o">.</span><span class="na">Resource</span><span class="o">(</span><span class="s">"squarer.cl"</span><span class="o">);</span> - <span class="kd">interface</span> <span class="nc">Squarer</span> <span class="kd">extends</span> <span class="n">OpenCL</span><span class="o"><</span><span class="n">Squarer</span><span class="o">>{</span> - <span class="kd">public</span> <span class="n">Squarer</span> <span class="nf">square</span><span class="o">(</span><span class="c1">//</span> - <span class="n">Range</span> <span class="n">_range</span><span class="o">,</span><span class="c1">//</span> - <span class="nd">@GlobalReadOnly</span><span class="o">(</span><span class="s">"in"</span><span class="o">)</span> <span class="kt">float</span><span class="o">[]</span> <span class="n">in</span><span class="o">,</span><span class="c1">//</span> - <span class="nd">@GlobalWriteOnly</span><span class="o">(</span><span class="s">"out"</span><span class="o">)</span> <span class="kt">float</span><span class="o">[]</span> <span class="n">out</span><span class="o">);</span> - <span class="o">}</span> - </code></pre> - <p>Or if the text is on-hand at compile time in a single constant string</p> - <pre class="highlight java"><code> - <span class="nd">@OpenCL</span><span class="o">.</span><span class="na">Source</span><span class="o">(</span><span class="s">"... opencl text here"</span><span class="o">);</span> - <span class="kd">interface</span> <span class="nc">Squarer</span> <span class="kd">extends</span> <span class="n">OpenCL</span><span class="o"><</span><span class="n">Squarer</span><span class="o">>{</span> - <span class="kd">public</span> <span class="n">Squarer</span> <span class="nf">square</span><span class="o">(</span><span class="c1">//</span> - <span class="n">Range</span> <span class="n">_range</span><span class="o">,</span><span class="c1">//</span> - <span class="nd">@GlobalReadOnly</span><span class="o">(</span><span class="s">"in"</span><span class="o">)</span> <span class="kt">float</span><span class="o">[]</span> <span class="n">in</span><span class="o">,</span><span class="c1">//</span> - <span class="nd">@GlobalWriteOnly</span><span class="o">(</span><span class="s">"out"</span><span class="o">)</span> <span class="kt">float</span><span class="o">[]</span> <span class="n">out</span><span class="o">);</span> - <span class="o">}</span> - </code></pre> - <p>Finally to allow for creation of dynamic OpenCL (good for FFT’s of various Radii).</p> - <pre class="highlight java"><code> - <span class="n">String</span> <span class="n">openclSource</span> <span class="o">=</span> <span class="o">...;</span> - <span class="n">Squarer</span> <span class="n">squarer</span> <span class="o">=</span> <span class="n">Device</span><span class="o">.</span><span class="na">firstGPU</span><span class="o">(</span><span class="n">Squarer</span><span class="o">.</span><span class="na">class</span><span class="o">,</span> <span class="n">openclSource</span><span class="o">);</span> - </code></pre> - </div> - </main> - <footer class='page-footer'> - <div class='container'> - <div class='row'> - <div class='col l4 s12'> - <h5 class='white-text'>Help Aparapi Grow</h5> - <p class='grey-text text-lighten-4'>We are a team of volunteers working on this project like it's our full time job. Any amount would help support and continue development on this project and is greatly appreciated.</p> - <form action='https://www.paypal.com/cgi-bin/webscr' id='paypal-donate' method='post' target='_top'> - <input name='cmd' type='hidden' value='_s-xclick'> - <input name='encrypted' type='hidden' value='-----BEGIN PKCS7-----MIIHoAYJKoZIhvcNAQcEoIIHkTCCB40CAQExggEwMIIBLAIBADCBlDCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20CAQAwDQYJKoZIhvcNAQEBBQAEgYATcKxN8t35TG2x34eY272SuZO3QbGy+BTGIM5DRV6Hmosotzw2TF42ceWmbXb3Gk4Wy5kUgo4TgHExCZHUSlHUl+A9KWLFejotgQJPhbiBsnns3klWbKftA3LEnP/kz/SW7OyBlpluoHoEGb354/aoX3JEctp3akHiZEmD7JyEgjELMAkGBSsOAwIaBQAwggEcBgkqhkiG9w0BBwEwFAYIKoZIhvcNAwcECOGCJwba6JICgIH4RtE1LE3juagKs+swI5tb9Y2LacWo+qn1H1aLKeg57bQMqqcWYvkoO1joYoglPc1h4mO0egZjHPQ6ih0K0IYlXw2SRpNylSlIMUE3GW6smjSSwRhscZfXQYUnmQsfYvkFwoKrlZGf/1u0Q7nwlZ1szIKnDMZ5f+k8xBcM0sMNutn/y9CH6A3zo01gQBIF29+1WYAoQspNAnfWQy3ydV7nbjIA9ThDp2WquWw3EVlvqlvm/3C2AFuH/L4q0ltn3qjkCdzXK0O2jW3TRrzligPkAy6CN0Tw2jGW5GENNC1L92vHFH4kBXUPlhvw39TgoN7/KRUjVoYPYgugggOHMIIDgzCCAuygAwIBAgIBADANBgkqhkiG9w0BAQUFADCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20wHhcNMDQwMjEzMTAxMzE1WhcNMzUwMjEzMTAxMzE1WjCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20wgZ8wDQYJKoZIhvcNAQEBBQADgY0AMIGJAoGBAMFHTt38RMxLXJyO2SmS+Ndl72T7oKJ4u4uw+6awntALWh03PewmIJuzbALScsTS4sZoS1fKciBGoh11gIfHzylvkdNe/hJl66/RGqrj5rFb08sAABNTzDTiqqNpJeBsYs/c2aiGozptX2RlnBktH+SUNpAajW724Nv2Wvhif6sFAgMBAAGjge4wgeswHQYDVR0OBBYEFJaffLvGbxe9WT9S1wob7BDWZJRrMIG7BgNVHSMEgbMwgbCAFJaffLvGbxe9WT9S1wob7BDWZJRroYGUpIGRMIGOMQswCQYDVQQGEwJVUzELMAkGA1UECBMCQ0ExFjAUBgNVBAcTDU1vdW50YWluIFZpZXcxFDASBgNVBAoTC1BheVBhbCBJbmMuMRMwEQYDVQQLFApsaXZlX2NlcnRzMREwDwYDVQQDFAhsaXZlX2FwaTEcMBoGCSqGSIb3DQEJARYNcmVAcGF5cGFsLmNvbYIBADAMBgNVHRMEBTADAQH/MA0GCSqGSIb3DQEBBQUAA4GBAIFfOlaagFrl71+jq6OKidbWFSE+Q4FqROvdgIONth+8kSK//Y/4ihuE4Ymvzn5ceE3S/iBSQQMjyvb+s2TWbQYDwcp129OPIbD9epdr4tJOUNiSojw7BHwYRiPh58S1xGlFgHFXwrEBb3dgNbMUa+u4qectsMAXpVHnD9wIyfmHMYIBmjCCAZYCAQEwgZQwgY4xCzAJBgNVBAYTAlVTMQswCQYDVQQIEwJDQTEWMBQGA1UEBxMNTW91bnRhaW4gVmlldzEUMBIGA1UEChMLUGF5UGFsIEluYy4xEzARBgNVBAsUCmxpdmVfY2VydHMxETAPBgNVBAMUCGxpdmVfYXBpMRwwGgYJKoZIhvcNAQkBFg1yZUBwYXlwYWwuY29tAgEAMAkGBSsOAwIaBQCgXTAYBgkqhkiG9w0BCQMxCwYJKoZIhvcNAQcBMBwGCSqGSIb3DQEJBTEPFw0xNjExMjkyMjA1NTNaMCMGCSqGSIb3DQEJBDEWBBS4i3Exr/pFcKOJy8uKmH+nGIMjqDANBgkqhkiG9w0BAQEFAASBgDAbFZ2jieloeB/0wCAcvYCFAIXmmBaMS5js/byzU5gK7exSTlRMX74IkmHemItaOcw3wyFlu4i118D9K9SbSbFiX9DGDcezGh42u/6G8TuZMwlvmiehwMioTVcm4jWG40YLiv8pJZypfoSx2w4IAFb4na5i/E1qOrwQOpiBho+s-----END PKCS7-----'> - <button alt='PayPal - The safer, easier way to pay online!' class='btn waves-effect waves-light red lighten-3' name='action' type='submit'> - Donate Now - </button> - </form> - </div> - <div class='col l4 s12'> - <h5 class='white-text'>Join the Discussion</h5> - <p class='grey-text text-lighten-4'>We have a Gitter chat room set up where you can talk directly with us. Come in and discuss new features, future goals, general problems or questions, or anything else you can think of.</p> - <a class='btn waves-effect waves-light red lighten-3' href='https://gitter.im/Syncleus/aparapi' target='_blank'>Chat</a> - </div> - <div class='col l4 s12' style='overflow: hidden;'> - <h5 class='white-text'>Connect</h5> - <iframe allowtransparency='true' frameborder='0' height='30' scrolling='0' src='http://ghbtns.com/github-btn.html?user=Syncleus&repo=aparapi&type=watch&count=true&size=large' width='170'></iframe> - <br> - <a class='twitter-follow-button' data-dnt='true' data-show-count='true' data-size='large' href='https://twitter.com/AparapiLib'>Follow @AparapiLib</a> - <br> - <div class='g-follow' data-annotation='bubble' data-height='24' data-href='https://plus.google.com/102266131584900704956' data-rel='publisher'></div> - </div> - </div> - </div> - <div class='footer-copyright'> - <div class='container'> - © 2016-2017 Syncleus, All rights reserved. - <a class='grey-text text-lighten-4 right' href='https://github.com/Syncleus/aparapi/blob/master/LICENSE'>Apache License v2</a> - </div> - </div> - </footer> - <!-- Scripts --> - <script src='https://code.jquery.com/jquery-2.1.4.min.js'></script> - <script> - if (!window.jQuery) { document.write('<script src="bin/jquery-2.1.1.min.js"><\/script>'); } - </script> - <script src='/javascripts/jquery.timeago.js'></script> - <script src='/javascripts/materialize.min.js'></script> - <script src='/javascripts/lunr.min.js'></script> - <script src='/javascripts/search.js'></script> - <script src='/javascripts/materialize.js'></script> - <script src='/javascripts/init.js'></script> - <!-- Twitter Button --> - <script> - !function(d,s,id){var js,fjs=d.getElementsByTagName(s)[0],p=/^http:/.test(d.location)?'http':'https';if(!d.getElementById(id)){js=d.createElement(s);js.id=id;js.src=p+'://platform.twitter.com/widgets.js';fjs.parentNode.insertBefore(js,fjs);}}(document, 'script', 'twitter-wjs'); - </script> - <!-- Google Plus Button --> - <script async='' defer='defer' src='https://apis.google.com/js/platform.js'></script> - </body> +<head> +<meta content='text/html; charset=UTF-8' http-equiv='Content-Type'> +<meta content='width=device-width, initial-scale=1' name='viewport'> +<meta content='IE=edge' http-equiv='X-UA-Compatible'> +<meta content='no' name='msapplication-tap-highlight'> +<meta content='Aparapi is an Open-source framework for executing native Java code on the GPU, developed by Syncleus.' name='description'> +<title> +Aparapi | OpenCL Bindings +</title> +<!-- Favicons --> +<link href='/images/favicon/apple-touch-icon-152x152.png' rel='apple-touch-icon-precomposed'> +<meta content='#FFFFFF' name='msapplication-TileColor'> +<meta content='/images/favicon/mstile-144x144.png' name='msapplication-TileImage'> +<link href='/images/favicon/favicon-32x32.png' rel='icon' sizes='32x32'> +<!-- Android 5 Chrome Color --> +<meta content='#EE6E73' name='theme-color'> +<!-- CSS --> +<link href='/stylesheets/highlight.css' media='screen,projection' rel='stylesheet' type='text/css'> +<link href='/stylesheets/style.css' media='screen,projection' rel='stylesheet' type='text/css'> +<link href='http://fonts.googleapis.com/css?family=Inconsolata' rel='stylesheet' type='text/css'> +<link href='http://fonts.googleapis.com/icon?family=Material+Icons' rel='stylesheet'> +</head> +<body> +<header> +<div class='container'> +<a class='button-collapse top-nav waves-effect waves-light circle hide-on-large-only' data-activates='nav-mobile' href='#'> +<i class='material-icons'>menu</i> +</a> +</div> +<ul class='side-nav fixed' id='nav-mobile'> +<li class='logo'> +<a class='brand-logo' href='/' id='logo-container'> +<object data='/images/logo.svg' id='front-page-logo' type='image/svg+xml'>Your browser does not support SVG</object> +</a> +</li> +<li class='search'> +<div class='search-wrapper card'> +<input id='search'> +<i class='material-icons'>search</i> +<div class='search-results'></div> +</div> +</li> +<li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/">Overview</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Introduction</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/about.html">About</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/getting-started.html">Getting Started</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/faq.html">FAQ</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Documentation</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/aparapi-patterns.html">Aparapi Patterns</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/choosing-specific-devices.html">Choosing Specific Devices</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/converting-java-to-opencl.html">Converting Java to OpenCL</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/emulating-multiple-entrypoints.html">Emulating Multiple Entrypoints</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/explicit-buffer-handling.html">Explicit Buffer Handling</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/hsa-enabled-lambda.html">HSA Enabled Lambda</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/kernel-guidelines.html">Kernel Guidelines</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/library-agent-duality.html">Library Agent Duality</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/new-features.html">New Features</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/opencl-bindings.html">OpenCL Bindings</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/private-memory-space.html">Private Memory Space</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/profiling-the-kernel.html">Profiling the Kernel</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/setting-up-hsa.html">Setting Up HSA</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/unit-tests.html">Unit Tests</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/using-hsa-simulator.html">Using HSA Simulator</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/constant-memory.html">Constant Memory</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/local-memory.html">Local Memory</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/multiple-dim-ranges.html">Multiple Dim Ranges</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Proposals</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/multiple-dim-nd-range.html">Multiple Dim ND Range</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/lambdas.html">Lambdas</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/address-space-with-buffers.html">Address Space with Buffers</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/extensions.html">Extensions</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/device.html">Device</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/multiple-entry-points.html">Multiple Entry Points</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/lambda-syntax.html">Lambda Syntax</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/showcase.html">Showcase</a></li></ul></li> +</ul> +</header> +<main> +<div class='section no-pad-bot' id='index-banner'> +<div class='container'> +<h1 class='header center-on-small-only'>OpenCL Bindings</h1> +<div class='row center'> +<h4 class='header col s12 light center'>How to use new OpenCL binding mechanism.</h4> +</div> + +</div> + +</div> +<div class='container'> +<p>As a step towards the extension mechanism I needed a way to easily bind OpenCL to an interface.</p> + +<p>Here is what I have come up with. We will use the ‘Square’ example.</p> + +<p>You first define an interface with OpenCL annotations..</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="kd">interface</span> <span class="nc">Squarer</span> <span class="kd">extends</span> <span class="n">OpenCL</span><span class="o"><</span><span class="n">Squarer</span><span class="o">>{</span> +<span class="nd">@Kernel</span><span class="o">(</span><span class="s">"{\n"</span><span class="c1">//</span> + <span class="o">+</span> <span class="s">" const size_t id = get_global_id(0);\n"</span><span class="c1">//</span> + <span class="o">+</span> <span class="s">" out[id] = in[id]*in[id];\n"</span><span class="c1">//</span> + <span class="o">+</span> <span class="s">"}\n"</span><span class="o">)</span><span class="c1">//</span> +<span class="kd">public</span> <span class="n">Squarer</span> <span class="nf">square</span><span class="o">(</span><span class="c1">//</span> + <span class="n">Range</span> <span class="n">_range</span><span class="o">,</span><span class="c1">//</span> + <span class="nd">@GlobalReadOnly</span><span class="o">(</span><span class="s">"in"</span><span class="o">)</span> <span class="kt">float</span><span class="o">[]</span> <span class="n">in</span><span class="o">,</span><span class="c1">//</span> + <span class="nd">@GlobalWriteOnly</span><span class="o">(</span><span class="s">"out"</span><span class="o">)</span> <span class="kt">float</span><span class="o">[]</span> <span class="n">out</span><span class="o">);</span> +<span class="o">}</span> +</code></pre></div> +<p>This describes the API we wish to bind to a set of kernel entrypoints (here we only have one, but we could have many). Then you ‘realize’ the interface by asking a device to create an implementation of the interface. Device is a new Aparapi class which represents a GPU or CPU OpenCL device. So here we are asking for the first (default) GPU device to realize the interface.</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="n">Squarer</span> <span class="n">squarer</span> <span class="o">=</span> <span class="n">Device</span><span class="o">.</span><span class="na">firstGPU</span><span class="o">(</span><span class="n">Squarer</span><span class="o">.</span><span class="na">class</span><span class="o">);</span> +</code></pre></div> +<p>Now you can call the implementation directly with a Range.</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="n">squarer</span><span class="o">.</span><span class="na">square</span><span class="o">(</span><span class="n">Range</span><span class="o">.</span><span class="na">create</span><span class="o">(</span><span class="n">in</span><span class="o">.</span><span class="na">length</span><span class="o">),</span> <span class="n">in</span><span class="o">,</span> <span class="n">out</span><span class="o">);</span> +</code></pre></div> +<p>I think that we will have the easiest OpenCL binding out there…</p> + +<p>Following some conversations/suggestions online http://a-hackers-craic.blogspot.com/2012/03/aparapi.html we could also offer the ability to provide the OpenCL source from a file/url course using interface level Annotations.</p> + +<p>So we could allow.</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="nd">@OpenCL</span><span class="o">.</span><span class="na">Resource</span><span class="o">(</span><span class="s">"squarer.cl"</span><span class="o">);</span> +<span class="kd">interface</span> <span class="nc">Squarer</span> <span class="kd">extends</span> <span class="n">OpenCL</span><span class="o"><</span><span class="n">Squarer</span><span class="o">>{</span> + <span class="kd">public</span> <span class="n">Squarer</span> <span class="nf">square</span><span class="o">(</span><span class="c1">//</span> + <span class="n">Range</span> <span class="n">_range</span><span class="o">,</span><span class="c1">//</span> + <span class="nd">@GlobalReadOnly</span><span class="o">(</span><span class="s">"in"</span><span class="o">)</span> <span class="kt">float</span><span class="o">[]</span> <span class="n">in</span><span class="o">,</span><span class="c1">//</span> + <span class="nd">@GlobalWriteOnly</span><span class="o">(</span><span class="s">"out"</span><span class="o">)</span> <span class="kt">float</span><span class="o">[]</span> <span class="n">out</span><span class="o">);</span> +<span class="o">}</span> +</code></pre></div> +<p>Or if the text is on-hand at compile time in a single constant string</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="nd">@OpenCL</span><span class="o">.</span><span class="na">Source</span><span class="o">(</span><span class="s">"... opencl text here"</span><span class="o">);</span> +<span class="kd">interface</span> <span class="nc">Squarer</span> <span class="kd">extends</span> <span class="n">OpenCL</span><span class="o"><</span><span class="n">Squarer</span><span class="o">>{</span> + <span class="kd">public</span> <span class="n">Squarer</span> <span class="nf">square</span><span class="o">(</span><span class="c1">//</span> + <span class="n">Range</span> <span class="n">_range</span><span class="o">,</span><span class="c1">//</span> + <span class="nd">@GlobalReadOnly</span><span class="o">(</span><span class="s">"in"</span><span class="o">)</span> <span class="kt">float</span><span class="o">[]</span> <span class="n">in</span><span class="o">,</span><span class="c1">//</span> + <span class="nd">@GlobalWriteOnly</span><span class="o">(</span><span class="s">"out"</span><span class="o">)</span> <span class="kt">float</span><span class="o">[]</span> <span class="n">out</span><span class="o">);</span> +<span class="o">}</span> +</code></pre></div> +<p>Finally to allow for creation of dynamic OpenCL (good for FFT’s of various Radii).</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="n">String</span> <span class="n">openclSource</span> <span class="o">=</span> <span class="o">...;</span> +<span class="n">Squarer</span> <span class="n">squarer</span> <span class="o">=</span> <span class="n">Device</span><span class="o">.</span><span class="na">firstGPU</span><span class="o">(</span><span class="n">Squarer</span><span class="o">.</span><span class="na">class</span><span class="o">,</span> <span class="n">openclSource</span><span class="o">);</span> +</code></pre></div> +</div> +</main> +<footer class='page-footer'> +<div class='container'> +<div class='row'> +<div class='col l4 s12'> +<h5 class='white-text'>Help Aparapi Grow</h5> +<p class='grey-text text-lighten-4'>We are a team of volunteers working on this project like it's our full time job. Any amount would help support and continue development on this project and is greatly appreciated.</p> +<form action='https://www.paypal.com/cgi-bin/webscr' id='paypal-donate' method='post' target='_top'> +<input name='cmd' type='hidden' value='_s-xclick'> +<input name='encrypted' type='hidden' value='-----BEGIN PKCS7-----MIIHoAYJKoZIhvcNAQcEoIIHkTCCB40CAQExggEwMIIBLAIBADCBlDCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20CAQAwDQYJKoZIhvcNAQEBBQAEgYATcKxN8t35TG2x34eY272SuZO3QbGy+BTGIM5DRV6Hmosotzw2TF42ceWmbXb3Gk4Wy5kUgo4TgHExCZHUSlHUl+A9KWLFejotgQJPhbiBsnns3klWbKftA3LEnP/kz/SW7OyBlpluoHoEGb354/aoX3JEctp3akHiZEmD7JyEgjELMAkGBSsOAwIaBQAwggEcBgkqhkiG9w0BBwEwFAYIKoZIhvcNAwcECOGCJwba6JICgIH4RtE1LE3juagKs+swI5tb9Y2LacWo+qn1H1aLKeg57bQMqqcWYvkoO1joYoglPc1h4mO0egZjHPQ6ih0K0IYlXw2SRpNylSlIMUE3GW6smjSSwRhscZfXQYUnmQsfYvkFwoKrlZGf/1u0Q7nwlZ1szIKnDMZ5f+k8xBcM0sMNutn/y9CH6A3zo01gQBIF29+1WYAoQspNAnfWQy3ydV7nbjIA9ThDp2WquWw3EVlvqlvm/3C2AFuH/L4q0ltn3qjkCdzXK0O2jW3TRrzligPkAy6CN0Tw2jGW5GENNC1L92vHFH4kBXUPlhvw39TgoN7/KRUjVoYPYgugggOHMIIDgzCCAuygAwIBAgIBADANBgkqhkiG9w0BAQUFADCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20wHhcNMDQwMjEzMTAxMzE1WhcNMzUwMjEzMTAxMzE1WjCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20wgZ8wDQYJKoZIhvcNAQEBBQADgY0AMIGJAoGBAMFHTt38RMxLXJyO2SmS+Ndl72T7oKJ4u4uw+6awntALWh03PewmIJuzbALScsTS4sZoS1fKciBGoh11gIfHzylvkdNe/hJl66/RGqrj5rFb08sAABNTzDTiqqNpJeBsYs/c2aiGozptX2RlnBktH+SUNpAajW724Nv2Wvhif6sFAgMBAAGjge4wgeswHQYDVR0OBBYEFJaffLvGbxe9WT9S1wob7BDWZJRrMIG7BgNVHSMEgbMwgbCAFJaffLvGbxe9WT9S1wob7BDWZJRroYGUpIGRMIGOMQswCQYDVQQGEwJVUzELMAkGA1UECBMCQ0ExFjAUBgNVBAcTDU1vdW50YWluIFZpZXcxFDASBgNVBAoTC1BheVBhbCBJbmMuMRMwEQYDVQQLFApsaXZlX2NlcnRzMREwDwYDVQQDFAhsaXZlX2FwaTEcMBoGCSqGSIb3DQEJARYNcmVAcGF5cGFsLmNvbYIBADAMBgNVHRMEBTADAQH/MA0GCSqGSIb3DQEBBQUAA4GBAIFfOlaagFrl71+jq6OKidbWFSE+Q4FqROvdgIONth+8kSK//Y/4ihuE4Ymvzn5ceE3S/iBSQQMjyvb+s2TWbQYDwcp129OPIbD9epdr4tJOUNiSojw7BHwYRiPh58S1xGlFgHFXwrEBb3dgNbMUa+u4qectsMAXpVHnD9wIyfmHMYIBmjCCAZYCAQEwgZQwgY4xCzAJBgNVBAYTAlVTMQswCQYDVQQIEwJDQTEWMBQGA1UEBxMNTW91bnRhaW4gVmlldzEUMBIGA1UEChMLUGF5UGFsIEluYy4xEzARBgNVBAsUCmxpdmVfY2VydHMxETAPBgNVBAMUCGxpdmVfYXBpMRwwGgYJKoZIhvcNAQkBFg1yZUBwYXlwYWwuY29tAgEAMAkGBSsOAwIaBQCgXTAYBgkqhkiG9w0BCQMxCwYJKoZIhvcNAQcBMBwGCSqGSIb3DQEJBTEPFw0xNjExMjkyMjA1NTNaMCMGCSqGSIb3DQEJBDEWBBS4i3Exr/pFcKOJy8uKmH+nGIMjqDANBgkqhkiG9w0BAQEFAASBgDAbFZ2jieloeB/0wCAcvYCFAIXmmBaMS5js/byzU5gK7exSTlRMX74IkmHemItaOcw3wyFlu4i118D9K9SbSbFiX9DGDcezGh42u/6G8TuZMwlvmiehwMioTVcm4jWG40YLiv8pJZypfoSx2w4IAFb4na5i/E1qOrwQOpiBho+s-----END PKCS7----- +'> +<button alt='PayPal - The safer, easier way to pay online!' class='btn waves-effect waves-light red lighten-3' name='action' type='submit'> +Donate Now +</button> +</form> +</div> +<div class='col l4 s12'> +<h5 class='white-text'>Join the Discussion</h5> +<p class='grey-text text-lighten-4'>We have a Gitter chat room set up where you can talk directly with us. Come in and discuss new features, future goals, general problems or questions, or anything else you can think of.</p> +<a class='btn waves-effect waves-light red lighten-3' href='https://gitter.im/Syncleus/aparapi' target='_blank'>Chat</a> +</div> +<div class='col l4 s12' style='overflow: hidden;'> +<h5 class='white-text'>Connect</h5> +<iframe allowtransparency='true' frameborder='0' height='30' scrolling='0' src='http://ghbtns.com/github-btn.html?user=Syncleus&repo=aparapi&type=watch&count=true&size=large' width='170'></iframe> +<br> +<a class='twitter-follow-button' data-dnt='true' data-show-count='true' data-size='large' href='https://twitter.com/AparapiLib'>Follow @AparapiLib</a> +<br> +<div class='g-follow' data-annotation='bubble' data-height='24' data-href='https://plus.google.com/102266131584900704956' data-rel='publisher'></div> +</div> +</div> +</div> +<div class='footer-copyright'> +<div class='container'> +© 2016-2017 Syncleus, All rights reserved. +<a class='grey-text text-lighten-4 right' href='https://github.com/Syncleus/aparapi/blob/master/LICENSE'>Apache License v2</a> +</div> +</div> +</footer> +<!-- Scripts --> +<script src='https://code.jquery.com/jquery-2.1.4.min.js'></script> +<script> + if (!window.jQuery) { document.write('<script src="bin/jquery-2.1.1.min.js"><\/script>'); } +</script> +<script src='/javascripts/jquery.timeago.js'></script> +<script src='/javascripts/materialize.min.js'></script> +<script src='/javascripts/lunr.min.js'></script> +<script src='/javascripts/search.js'></script> +<script src='/javascripts/materialize.js'></script> +<script src='/javascripts/init.js'></script> +<!-- Twitter Button --> +<script> + !function(d,s,id){var js,fjs=d.getElementsByTagName(s)[0],p=/^http:/.test(d.location)?'http':'https';if(!d.getElementById(id)){js=d.createElement(s);js.id=id;js.src=p+'://platform.twitter.com/widgets.js';fjs.parentNode.insertBefore(js,fjs);}}(document, 'script', 'twitter-wjs'); +</script> +<!-- Google Plus Button --> +<script async='' defer='defer' src='https://apis.google.com/js/platform.js'></script> +</body> </html> diff --git a/documentation/private-memory-space.html b/documentation/private-memory-space.html index 4248d30ffb9d9b8d6e6b5e0d5575550d310a7701..1b34c52aef8ec51d475c64aceb9664f380d1ca86 100644 --- a/documentation/private-memory-space.html +++ b/documentation/private-memory-space.html @@ -1,149 +1,151 @@ <!DOCTYPE html> <html lang='en'> - <head> - <meta content='text/html; charset=UTF-8' http-equiv='Content-Type'> - <meta content='width=device-width, initial-scale=1' name='viewport'> - <meta content='IE=edge' http-equiv='X-UA-Compatible'> - <meta content='no' name='msapplication-tap-highlight'> - <meta content='Aparapi is an Open-source framework for executing native Java code on the GPU, developed by Syncleus.' name='description'> - <title> - Aparapi | Private Memory Space - </title> - <!-- Favicons --> - <link href='/images/favicon/apple-touch-icon-152x152.png' rel='apple-touch-icon-precomposed'> - <meta content='#FFFFFF' name='msapplication-TileColor'> - <meta content='/images/favicon/mstile-144x144.png' name='msapplication-TileImage'> - <link href='/images/favicon/favicon-32x32.png' rel='icon' sizes='32x32'> - <!-- Android 5 Chrome Color --> - <meta content='#EE6E73' name='theme-color'> - <!-- CSS --> - <link href='/stylesheets/highlight.css' media='screen,projection' rel='stylesheet' type='text/css'> - <link href='/stylesheets/style.css' media='screen,projection' rel='stylesheet' type='text/css'> - <link href='http://fonts.googleapis.com/css?family=Inconsolata' rel='stylesheet' type='text/css'> - <link href='http://fonts.googleapis.com/icon?family=Material+Icons' rel='stylesheet'> - </head> - <body> - <header> - <div class='container'> - <a class='button-collapse top-nav waves-effect waves-light circle hide-on-large-only' data-activates='nav-mobile' href='#'> - <i class='material-icons'>menu</i> - </a> - </div> - <ul class='side-nav fixed' id='nav-mobile'> - <li class='logo'> - <a class='brand-logo' href='/' id='logo-container'> - <object data='/images/logo.svg' id='front-page-logo' type='image/svg+xml'>Your browser does not support SVG</object> - </a> - </li> - <li class='search'> - <div class='search-wrapper card'> - <input id='search'> - <i class='material-icons'>search</i> - <div class='search-results'></div> - </div> - </li> - <li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/">Overview</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Introduction</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/about.html">About</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/getting-started.html">Getting Started</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/faq.html">FAQ</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Documentation</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/aparapi-patterns.html">Aparapi Patterns</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/choosing-specific-devices.html">Choosing Specific Devices</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/converting-java-to-opencl.html">Converting Java to OpenCL</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/emulating-multiple-entrypoints.html">Emulating Multiple Entrypoints</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/explicit-buffer-handling.html">Explicit Buffer Handling</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/hsa-enabled-lambda.html">HSA Enabled Lambda</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/kernel-guidelines.html">Kernel Guidelines</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/library-agent-duality.html">Library Agent Duality</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/new-features.html">New Features</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/opencl-bindings.html">OpenCL Bindings</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/private-memory-space.html">Private Memory Space</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/profiling-the-kernel.html">Profiling the Kernel</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/setting-up-hsa.html">Setting Up HSA</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/unit-tests.html">Unit Tests</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/using-hsa-simulator.html">Using HSA Simulator</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/constant-memory.html">Constant Memory</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/local-memory.html">Local Memory</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/multiple-dim-ranges.html">Multiple Dim Ranges</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Proposals</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/multiple-dim-nd-range.html">Multiple Dim ND Range</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/lambdas.html">Lambdas</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/address-space-with-buffers.html">Address Space with Buffers</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/extensions.html">Extensions</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/device.html">Device</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/multiple-entry-points.html">Multiple Entry Points</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/lambda-syntax.html">Lambda Syntax</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/showcase.html">Showcase</a></li></ul></li> - </ul> - </header> - <main> - <div class='section no-pad-bot' id='index-banner'> - <div class='container'> - <h1 class='header center-on-small-only'>Private Memory Space</h1> - <div class='row center'> - <h4 class='header col s12 light center'>Using private memory space in Aparapi kernels.</h4> - </div> - - </div> - - </div> - <div class='container'> - <h2>Introduction</h2> - - <p>The private memory space identifier (just “private” is also recognised) can be applied to struct fields in order to indicate that the data is not shared with/accessible to other kernel instances. Whilst this is the default for non-array data, it must be explicitly applied to array fields in order to make them private. Aparapi now supports arrays in the private memory space.</p> - - <p>The private memory space is generally only suitable for smallish arrays, but is required for certain algorithms, e.g. for those which must mutate (for example, sort or partially sort) an exclusive copy of an array/subarray.</p> - - <h2>Details</h2> - - <p>In Aparapi there are two mechanisms available to mark a Kernel class member as belonging to the private memory space when mapped to OpenCL code (matching the equivalent functionality for marking items as belonging to the local memory space). Either the field can be named with a suffix plus buffer size, for example</p> - <pre class="highlight java"><code> - <span class="kd">protected</span> <span class="kt">short</span><span class="o">[]</span> <span class="n">myBuffer_$private</span><span class="err">$</span><span class="mi">32</span> <span class="o">=</span> <span class="k">new</span> <span class="kt">short</span><span class="o">[</span><span class="mi">32</span><span class="o">];</span> - </code></pre> - <p>or using the Annotation Kernel.PrivateMemorySpace, for example</p> - <pre class="highlight java"><code> - <span class="kd">protected</span> <span class="nd">@PrivateMemorySpace</span><span class="o">(</span><span class="mi">32</span><span class="o">)</span> <span class="kt">short</span><span class="o">[]</span> <span class="n">myBuffer</span> <span class="o">=</span> <span class="k">new</span> <span class="kt">short</span><span class="o">[</span><span class="mi">32</span><span class="o">];</span> - </code></pre> - <p>The latter should be used in preference to the former.</p> - - <p>Note that OpenCL requires that the size of a private array be fixed at compile time for any kernel. Thus it is not possible for a single Kernel subclass to support private buffers of varying size. Unfortunately this may entail creating multiple subclasses with varying buffer sizes in order to most efficiently support varying private buffer sizes.</p> - - <p>Of course, a single Kernel class can be created which has a private buffer large enough for all use cases, though this may be suboptimal if only a small fraction of the maximum buffer size is commonly required.</p> - - <p>Because private buffers are unshared, they require much more of a GPU’s memory than a local or global buffer of the same size, and should therefore be used sparingly and kept as small as possible, as overuse of large private arrays might cause GPU execution to fail on lower-end graphics cards.</p> - - <p>However, private memory space is the fastest of all OpenCls memory spaces, so may in some limited cases might be used to increase execution speed even when the kernel does not need to modify the array and a shared (local or global) array would suffice - for example to provide a smallish lookup-table to replace an expensive function call.</p> - - <p>Without modification, an Aparapi kernel which uses private buffers may fail to work when invoked in Java Threadpool (JTP) mode, because the buffer will be shared across multiple threads. However a simple mechanism exists which allows such buffers to be used safely in JTP execution mode.</p> - - <p>The Kernel.NoCL annotation exists to allow specialised code to be executed when running in Java (or JTP) which is not invoked when running on the GPU. A NoCL method can be inserted at the begining of a Kernel’s run() method which sets the private array to a value obtained from a static ThreadLocal<foo[]> where foo is the primitive type of the array in question. This will have no effect upon OpenCL execution, but will allow threadsafe execution when running in java.</p> - - <p>In the project samples, there is a package com.aparapi.sample.median which gives an example of a median image filter which uses a private array of pixel data to apply a distructive median algorithm to a “window” of local pixels. This sample also demonstrates how to use the ThreadLocal trick to allow correct behaviour when running in JTP execution mode.</p> - - <p><a href="http://code.google.com/p/aparapi/source/browse/trunk/samples/median/src/com/amd/aparapi/sample/median/MedianDemo.java" rel="nofollow">http://code.google.com/p/aparapi/source/browse/trunk/samples/median/src/com/amd/aparapi/sample/median/MedianDemo.java</a></p> - </div> - </main> - <footer class='page-footer'> - <div class='container'> - <div class='row'> - <div class='col l4 s12'> - <h5 class='white-text'>Help Aparapi Grow</h5> - <p class='grey-text text-lighten-4'>We are a team of volunteers working on this project like it's our full time job. Any amount would help support and continue development on this project and is greatly appreciated.</p> - <form action='https://www.paypal.com/cgi-bin/webscr' id='paypal-donate' method='post' target='_top'> - <input name='cmd' type='hidden' value='_s-xclick'> - <input name='encrypted' type='hidden' value='-----BEGIN PKCS7-----MIIHoAYJKoZIhvcNAQcEoIIHkTCCB40CAQExggEwMIIBLAIBADCBlDCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20CAQAwDQYJKoZIhvcNAQEBBQAEgYATcKxN8t35TG2x34eY272SuZO3QbGy+BTGIM5DRV6Hmosotzw2TF42ceWmbXb3Gk4Wy5kUgo4TgHExCZHUSlHUl+A9KWLFejotgQJPhbiBsnns3klWbKftA3LEnP/kz/SW7OyBlpluoHoEGb354/aoX3JEctp3akHiZEmD7JyEgjELMAkGBSsOAwIaBQAwggEcBgkqhkiG9w0BBwEwFAYIKoZIhvcNAwcECOGCJwba6JICgIH4RtE1LE3juagKs+swI5tb9Y2LacWo+qn1H1aLKeg57bQMqqcWYvkoO1joYoglPc1h4mO0egZjHPQ6ih0K0IYlXw2SRpNylSlIMUE3GW6smjSSwRhscZfXQYUnmQsfYvkFwoKrlZGf/1u0Q7nwlZ1szIKnDMZ5f+k8xBcM0sMNutn/y9CH6A3zo01gQBIF29+1WYAoQspNAnfWQy3ydV7nbjIA9ThDp2WquWw3EVlvqlvm/3C2AFuH/L4q0ltn3qjkCdzXK0O2jW3TRrzligPkAy6CN0Tw2jGW5GENNC1L92vHFH4kBXUPlhvw39TgoN7/KRUjVoYPYgugggOHMIIDgzCCAuygAwIBAgIBADANBgkqhkiG9w0BAQUFADCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20wHhcNMDQwMjEzMTAxMzE1WhcNMzUwMjEzMTAxMzE1WjCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20wgZ8wDQYJKoZIhvcNAQEBBQADgY0AMIGJAoGBAMFHTt38RMxLXJyO2SmS+Ndl72T7oKJ4u4uw+6awntALWh03PewmIJuzbALScsTS4sZoS1fKciBGoh11gIfHzylvkdNe/hJl66/RGqrj5rFb08sAABNTzDTiqqNpJeBsYs/c2aiGozptX2RlnBktH+SUNpAajW724Nv2Wvhif6sFAgMBAAGjge4wgeswHQYDVR0OBBYEFJaffLvGbxe9WT9S1wob7BDWZJRrMIG7BgNVHSMEgbMwgbCAFJaffLvGbxe9WT9S1wob7BDWZJRroYGUpIGRMIGOMQswCQYDVQQGEwJVUzELMAkGA1UECBMCQ0ExFjAUBgNVBAcTDU1vdW50YWluIFZpZXcxFDASBgNVBAoTC1BheVBhbCBJbmMuMRMwEQYDVQQLFApsaXZlX2NlcnRzMREwDwYDVQQDFAhsaXZlX2FwaTEcMBoGCSqGSIb3DQEJARYNcmVAcGF5cGFsLmNvbYIBADAMBgNVHRMEBTADAQH/MA0GCSqGSIb3DQEBBQUAA4GBAIFfOlaagFrl71+jq6OKidbWFSE+Q4FqROvdgIONth+8kSK//Y/4ihuE4Ymvzn5ceE3S/iBSQQMjyvb+s2TWbQYDwcp129OPIbD9epdr4tJOUNiSojw7BHwYRiPh58S1xGlFgHFXwrEBb3dgNbMUa+u4qectsMAXpVHnD9wIyfmHMYIBmjCCAZYCAQEwgZQwgY4xCzAJBgNVBAYTAlVTMQswCQYDVQQIEwJDQTEWMBQGA1UEBxMNTW91bnRhaW4gVmlldzEUMBIGA1UEChMLUGF5UGFsIEluYy4xEzARBgNVBAsUCmxpdmVfY2VydHMxETAPBgNVBAMUCGxpdmVfYXBpMRwwGgYJKoZIhvcNAQkBFg1yZUBwYXlwYWwuY29tAgEAMAkGBSsOAwIaBQCgXTAYBgkqhkiG9w0BCQMxCwYJKoZIhvcNAQcBMBwGCSqGSIb3DQEJBTEPFw0xNjExMjkyMjA1NTNaMCMGCSqGSIb3DQEJBDEWBBS4i3Exr/pFcKOJy8uKmH+nGIMjqDANBgkqhkiG9w0BAQEFAASBgDAbFZ2jieloeB/0wCAcvYCFAIXmmBaMS5js/byzU5gK7exSTlRMX74IkmHemItaOcw3wyFlu4i118D9K9SbSbFiX9DGDcezGh42u/6G8TuZMwlvmiehwMioTVcm4jWG40YLiv8pJZypfoSx2w4IAFb4na5i/E1qOrwQOpiBho+s-----END PKCS7-----'> - <button alt='PayPal - The safer, easier way to pay online!' class='btn waves-effect waves-light red lighten-3' name='action' type='submit'> - Donate Now - </button> - </form> - </div> - <div class='col l4 s12'> - <h5 class='white-text'>Join the Discussion</h5> - <p class='grey-text text-lighten-4'>We have a Gitter chat room set up where you can talk directly with us. Come in and discuss new features, future goals, general problems or questions, or anything else you can think of.</p> - <a class='btn waves-effect waves-light red lighten-3' href='https://gitter.im/Syncleus/aparapi' target='_blank'>Chat</a> - </div> - <div class='col l4 s12' style='overflow: hidden;'> - <h5 class='white-text'>Connect</h5> - <iframe allowtransparency='true' frameborder='0' height='30' scrolling='0' src='http://ghbtns.com/github-btn.html?user=Syncleus&repo=aparapi&type=watch&count=true&size=large' width='170'></iframe> - <br> - <a class='twitter-follow-button' data-dnt='true' data-show-count='true' data-size='large' href='https://twitter.com/AparapiLib'>Follow @AparapiLib</a> - <br> - <div class='g-follow' data-annotation='bubble' data-height='24' data-href='https://plus.google.com/102266131584900704956' data-rel='publisher'></div> - </div> - </div> - </div> - <div class='footer-copyright'> - <div class='container'> - © 2016-2017 Syncleus, All rights reserved. - <a class='grey-text text-lighten-4 right' href='https://github.com/Syncleus/aparapi/blob/master/LICENSE'>Apache License v2</a> - </div> - </div> - </footer> - <!-- Scripts --> - <script src='https://code.jquery.com/jquery-2.1.4.min.js'></script> - <script> - if (!window.jQuery) { document.write('<script src="bin/jquery-2.1.1.min.js"><\/script>'); } - </script> - <script src='/javascripts/jquery.timeago.js'></script> - <script src='/javascripts/materialize.min.js'></script> - <script src='/javascripts/lunr.min.js'></script> - <script src='/javascripts/search.js'></script> - <script src='/javascripts/materialize.js'></script> - <script src='/javascripts/init.js'></script> - <!-- Twitter Button --> - <script> - !function(d,s,id){var js,fjs=d.getElementsByTagName(s)[0],p=/^http:/.test(d.location)?'http':'https';if(!d.getElementById(id)){js=d.createElement(s);js.id=id;js.src=p+'://platform.twitter.com/widgets.js';fjs.parentNode.insertBefore(js,fjs);}}(document, 'script', 'twitter-wjs'); - </script> - <!-- Google Plus Button --> - <script async='' defer='defer' src='https://apis.google.com/js/platform.js'></script> - </body> +<head> +<meta content='text/html; charset=UTF-8' http-equiv='Content-Type'> +<meta content='width=device-width, initial-scale=1' name='viewport'> +<meta content='IE=edge' http-equiv='X-UA-Compatible'> +<meta content='no' name='msapplication-tap-highlight'> +<meta content='Aparapi is an Open-source framework for executing native Java code on the GPU, developed by Syncleus.' name='description'> +<title> +Aparapi | Private Memory Space +</title> +<!-- Favicons --> +<link href='/images/favicon/apple-touch-icon-152x152.png' rel='apple-touch-icon-precomposed'> +<meta content='#FFFFFF' name='msapplication-TileColor'> +<meta content='/images/favicon/mstile-144x144.png' name='msapplication-TileImage'> +<link href='/images/favicon/favicon-32x32.png' rel='icon' sizes='32x32'> +<!-- Android 5 Chrome Color --> +<meta content='#EE6E73' name='theme-color'> +<!-- CSS --> +<link href='/stylesheets/highlight.css' media='screen,projection' rel='stylesheet' type='text/css'> +<link href='/stylesheets/style.css' media='screen,projection' rel='stylesheet' type='text/css'> +<link href='http://fonts.googleapis.com/css?family=Inconsolata' rel='stylesheet' type='text/css'> +<link href='http://fonts.googleapis.com/icon?family=Material+Icons' rel='stylesheet'> +</head> +<body> +<header> +<div class='container'> +<a class='button-collapse top-nav waves-effect waves-light circle hide-on-large-only' data-activates='nav-mobile' href='#'> +<i class='material-icons'>menu</i> +</a> +</div> +<ul class='side-nav fixed' id='nav-mobile'> +<li class='logo'> +<a class='brand-logo' href='/' id='logo-container'> +<object data='/images/logo.svg' id='front-page-logo' type='image/svg+xml'>Your browser does not support SVG</object> +</a> +</li> +<li class='search'> +<div class='search-wrapper card'> +<input id='search'> +<i class='material-icons'>search</i> +<div class='search-results'></div> +</div> +</li> +<li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/">Overview</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Introduction</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/about.html">About</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/getting-started.html">Getting Started</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/faq.html">FAQ</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Documentation</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/aparapi-patterns.html">Aparapi Patterns</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/choosing-specific-devices.html">Choosing Specific Devices</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/converting-java-to-opencl.html">Converting Java to OpenCL</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/emulating-multiple-entrypoints.html">Emulating Multiple Entrypoints</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/explicit-buffer-handling.html">Explicit Buffer Handling</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/hsa-enabled-lambda.html">HSA Enabled Lambda</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/kernel-guidelines.html">Kernel Guidelines</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/library-agent-duality.html">Library Agent Duality</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/new-features.html">New Features</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/opencl-bindings.html">OpenCL Bindings</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/private-memory-space.html">Private Memory Space</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/profiling-the-kernel.html">Profiling the Kernel</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/setting-up-hsa.html">Setting Up HSA</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/unit-tests.html">Unit Tests</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/using-hsa-simulator.html">Using HSA Simulator</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/constant-memory.html">Constant Memory</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/local-memory.html">Local Memory</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/multiple-dim-ranges.html">Multiple Dim Ranges</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Proposals</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/multiple-dim-nd-range.html">Multiple Dim ND Range</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/lambdas.html">Lambdas</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/address-space-with-buffers.html">Address Space with Buffers</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/extensions.html">Extensions</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/device.html">Device</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/multiple-entry-points.html">Multiple Entry Points</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/lambda-syntax.html">Lambda Syntax</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/showcase.html">Showcase</a></li></ul></li> +</ul> +</header> +<main> +<div class='section no-pad-bot' id='index-banner'> +<div class='container'> +<h1 class='header center-on-small-only'>Private Memory Space</h1> +<div class='row center'> +<h4 class='header col s12 light center'>Using private memory space in Aparapi kernels.</h4> +</div> + +</div> + +</div> +<div class='container'> +<h2>Introduction</h2> + +<p>The private memory space identifier (just “private” is also recognised) can be applied to struct fields in order to indicate that the data is not shared with/accessible to other kernel instances. Whilst this is the default for non-array data, it must be explicitly applied to array fields in order to make them private. Aparapi now supports arrays in the private memory space.</p> + +<p>The private memory space is generally only suitable for smallish arrays, but is required for certain algorithms, e.g. for those which must mutate (for example, sort or partially sort) an exclusive copy of an array/subarray.</p> + +<h2>Details</h2> + +<p>In Aparapi there are two mechanisms available to mark a Kernel class member as belonging to the private memory space when mapped to OpenCL code (matching the equivalent functionality for marking items as belonging to the local memory space). Either the field can be named with a suffix plus buffer size, for example</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="kd">protected</span> <span class="kt">short</span><span class="o">[]</span> <span class="n">myBuffer_$private</span><span class="err">$</span><span class="mi">32</span> <span class="o">=</span> <span class="k">new</span> <span class="kt">short</span><span class="o">[</span><span class="mi">32</span><span class="o">];</span> +</code></pre></div> +<p>or using the Annotation Kernel.PrivateMemorySpace, for example</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="kd">protected</span> <span class="nd">@PrivateMemorySpace</span><span class="o">(</span><span class="mi">32</span><span class="o">)</span> <span class="kt">short</span><span class="o">[]</span> <span class="n">myBuffer</span> <span class="o">=</span> <span class="k">new</span> <span class="kt">short</span><span class="o">[</span><span class="mi">32</span><span class="o">];</span> +</code></pre></div> +<p>The latter should be used in preference to the former.</p> + +<p>Note that OpenCL requires that the size of a private array be fixed at compile time for any kernel. Thus it is not possible for a single Kernel subclass to support private buffers of varying size. Unfortunately this may entail creating multiple subclasses with varying buffer sizes in order to most efficiently support varying private buffer sizes.</p> + +<p>Of course, a single Kernel class can be created which has a private buffer large enough for all use cases, though this may be suboptimal if only a small fraction of the maximum buffer size is commonly required.</p> + +<p>Because private buffers are unshared, they require much more of a GPU’s memory than a local or global buffer of the same size, and should therefore be used sparingly and kept as small as possible, as overuse of large private arrays might cause GPU execution to fail on lower-end graphics cards.</p> + +<p>However, private memory space is the fastest of all OpenCls memory spaces, so may in some limited cases might be used to increase execution speed even when the kernel does not need to modify the array and a shared (local or global) array would suffice - for example to provide a smallish lookup-table to replace an expensive function call.</p> + +<p>Without modification, an Aparapi kernel which uses private buffers may fail to work when invoked in Java Threadpool (JTP) mode, because the buffer will be shared across multiple threads. However a simple mechanism exists which allows such buffers to be used safely in JTP execution mode.</p> + +<p>The Kernel.NoCL annotation exists to allow specialised code to be executed when running in Java (or JTP) which is not invoked when running on the GPU. A NoCL method can be inserted at the begining of a Kernel’s run() method which sets the private array to a value obtained from a static ThreadLocal<foo[]> where foo is the primitive type of the array in question. This will have no effect upon OpenCL execution, but will allow threadsafe execution when running in java.</p> + +<p>In the project samples, there is a package com.aparapi.sample.median which gives an example of a median image filter which uses a private array of pixel data to apply a distructive median algorithm to a “window” of local pixels. This sample also demonstrates how to use the ThreadLocal trick to allow correct behaviour when running in JTP execution mode.</p> + +<p><a href="http://code.google.com/p/aparapi/source/browse/trunk/samples/median/src/com/amd/aparapi/sample/median/MedianDemo.java" rel="nofollow">http://code.google.com/p/aparapi/source/browse/trunk/samples/median/src/com/amd/aparapi/sample/median/MedianDemo.java</a></p> + +</div> +</main> +<footer class='page-footer'> +<div class='container'> +<div class='row'> +<div class='col l4 s12'> +<h5 class='white-text'>Help Aparapi Grow</h5> +<p class='grey-text text-lighten-4'>We are a team of volunteers working on this project like it's our full time job. Any amount would help support and continue development on this project and is greatly appreciated.</p> +<form action='https://www.paypal.com/cgi-bin/webscr' id='paypal-donate' method='post' target='_top'> +<input name='cmd' type='hidden' value='_s-xclick'> +<input name='encrypted' type='hidden' value='-----BEGIN PKCS7-----MIIHoAYJKoZIhvcNAQcEoIIHkTCCB40CAQExggEwMIIBLAIBADCBlDCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20CAQAwDQYJKoZIhvcNAQEBBQAEgYATcKxN8t35TG2x34eY272SuZO3QbGy+BTGIM5DRV6Hmosotzw2TF42ceWmbXb3Gk4Wy5kUgo4TgHExCZHUSlHUl+A9KWLFejotgQJPhbiBsnns3klWbKftA3LEnP/kz/SW7OyBlpluoHoEGb354/aoX3JEctp3akHiZEmD7JyEgjELMAkGBSsOAwIaBQAwggEcBgkqhkiG9w0BBwEwFAYIKoZIhvcNAwcECOGCJwba6JICgIH4RtE1LE3juagKs+swI5tb9Y2LacWo+qn1H1aLKeg57bQMqqcWYvkoO1joYoglPc1h4mO0egZjHPQ6ih0K0IYlXw2SRpNylSlIMUE3GW6smjSSwRhscZfXQYUnmQsfYvkFwoKrlZGf/1u0Q7nwlZ1szIKnDMZ5f+k8xBcM0sMNutn/y9CH6A3zo01gQBIF29+1WYAoQspNAnfWQy3ydV7nbjIA9ThDp2WquWw3EVlvqlvm/3C2AFuH/L4q0ltn3qjkCdzXK0O2jW3TRrzligPkAy6CN0Tw2jGW5GENNC1L92vHFH4kBXUPlhvw39TgoN7/KRUjVoYPYgugggOHMIIDgzCCAuygAwIBAgIBADANBgkqhkiG9w0BAQUFADCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20wHhcNMDQwMjEzMTAxMzE1WhcNMzUwMjEzMTAxMzE1WjCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20wgZ8wDQYJKoZIhvcNAQEBBQADgY0AMIGJAoGBAMFHTt38RMxLXJyO2SmS+Ndl72T7oKJ4u4uw+6awntALWh03PewmIJuzbALScsTS4sZoS1fKciBGoh11gIfHzylvkdNe/hJl66/RGqrj5rFb08sAABNTzDTiqqNpJeBsYs/c2aiGozptX2RlnBktH+SUNpAajW724Nv2Wvhif6sFAgMBAAGjge4wgeswHQYDVR0OBBYEFJaffLvGbxe9WT9S1wob7BDWZJRrMIG7BgNVHSMEgbMwgbCAFJaffLvGbxe9WT9S1wob7BDWZJRroYGUpIGRMIGOMQswCQYDVQQGEwJVUzELMAkGA1UECBMCQ0ExFjAUBgNVBAcTDU1vdW50YWluIFZpZXcxFDASBgNVBAoTC1BheVBhbCBJbmMuMRMwEQYDVQQLFApsaXZlX2NlcnRzMREwDwYDVQQDFAhsaXZlX2FwaTEcMBoGCSqGSIb3DQEJARYNcmVAcGF5cGFsLmNvbYIBADAMBgNVHRMEBTADAQH/MA0GCSqGSIb3DQEBBQUAA4GBAIFfOlaagFrl71+jq6OKidbWFSE+Q4FqROvdgIONth+8kSK//Y/4ihuE4Ymvzn5ceE3S/iBSQQMjyvb+s2TWbQYDwcp129OPIbD9epdr4tJOUNiSojw7BHwYRiPh58S1xGlFgHFXwrEBb3dgNbMUa+u4qectsMAXpVHnD9wIyfmHMYIBmjCCAZYCAQEwgZQwgY4xCzAJBgNVBAYTAlVTMQswCQYDVQQIEwJDQTEWMBQGA1UEBxMNTW91bnRhaW4gVmlldzEUMBIGA1UEChMLUGF5UGFsIEluYy4xEzARBgNVBAsUCmxpdmVfY2VydHMxETAPBgNVBAMUCGxpdmVfYXBpMRwwGgYJKoZIhvcNAQkBFg1yZUBwYXlwYWwuY29tAgEAMAkGBSsOAwIaBQCgXTAYBgkqhkiG9w0BCQMxCwYJKoZIhvcNAQcBMBwGCSqGSIb3DQEJBTEPFw0xNjExMjkyMjA1NTNaMCMGCSqGSIb3DQEJBDEWBBS4i3Exr/pFcKOJy8uKmH+nGIMjqDANBgkqhkiG9w0BAQEFAASBgDAbFZ2jieloeB/0wCAcvYCFAIXmmBaMS5js/byzU5gK7exSTlRMX74IkmHemItaOcw3wyFlu4i118D9K9SbSbFiX9DGDcezGh42u/6G8TuZMwlvmiehwMioTVcm4jWG40YLiv8pJZypfoSx2w4IAFb4na5i/E1qOrwQOpiBho+s-----END PKCS7----- +'> +<button alt='PayPal - The safer, easier way to pay online!' class='btn waves-effect waves-light red lighten-3' name='action' type='submit'> +Donate Now +</button> +</form> +</div> +<div class='col l4 s12'> +<h5 class='white-text'>Join the Discussion</h5> +<p class='grey-text text-lighten-4'>We have a Gitter chat room set up where you can talk directly with us. Come in and discuss new features, future goals, general problems or questions, or anything else you can think of.</p> +<a class='btn waves-effect waves-light red lighten-3' href='https://gitter.im/Syncleus/aparapi' target='_blank'>Chat</a> +</div> +<div class='col l4 s12' style='overflow: hidden;'> +<h5 class='white-text'>Connect</h5> +<iframe allowtransparency='true' frameborder='0' height='30' scrolling='0' src='http://ghbtns.com/github-btn.html?user=Syncleus&repo=aparapi&type=watch&count=true&size=large' width='170'></iframe> +<br> +<a class='twitter-follow-button' data-dnt='true' data-show-count='true' data-size='large' href='https://twitter.com/AparapiLib'>Follow @AparapiLib</a> +<br> +<div class='g-follow' data-annotation='bubble' data-height='24' data-href='https://plus.google.com/102266131584900704956' data-rel='publisher'></div> +</div> +</div> +</div> +<div class='footer-copyright'> +<div class='container'> +© 2016-2017 Syncleus, All rights reserved. +<a class='grey-text text-lighten-4 right' href='https://github.com/Syncleus/aparapi/blob/master/LICENSE'>Apache License v2</a> +</div> +</div> +</footer> +<!-- Scripts --> +<script src='https://code.jquery.com/jquery-2.1.4.min.js'></script> +<script> + if (!window.jQuery) { document.write('<script src="bin/jquery-2.1.1.min.js"><\/script>'); } +</script> +<script src='/javascripts/jquery.timeago.js'></script> +<script src='/javascripts/materialize.min.js'></script> +<script src='/javascripts/lunr.min.js'></script> +<script src='/javascripts/search.js'></script> +<script src='/javascripts/materialize.js'></script> +<script src='/javascripts/init.js'></script> +<!-- Twitter Button --> +<script> + !function(d,s,id){var js,fjs=d.getElementsByTagName(s)[0],p=/^http:/.test(d.location)?'http':'https';if(!d.getElementById(id)){js=d.createElement(s);js.id=id;js.src=p+'://platform.twitter.com/widgets.js';fjs.parentNode.insertBefore(js,fjs);}}(document, 'script', 'twitter-wjs'); +</script> +<!-- Google Plus Button --> +<script async='' defer='defer' src='https://apis.google.com/js/platform.js'></script> +</body> </html> diff --git a/documentation/profiling-the-kernel.html b/documentation/profiling-the-kernel.html index 57069c7d0b2a2ca2b62262e6969742bdbbef0002..0606d7c8a8523ded84baecfee134732edb1ee304 100644 --- a/documentation/profiling-the-kernel.html +++ b/documentation/profiling-the-kernel.html @@ -1,165 +1,167 @@ <!DOCTYPE html> <html lang='en'> - <head> - <meta content='text/html; charset=UTF-8' http-equiv='Content-Type'> - <meta content='width=device-width, initial-scale=1' name='viewport'> - <meta content='IE=edge' http-equiv='X-UA-Compatible'> - <meta content='no' name='msapplication-tap-highlight'> - <meta content='Aparapi is an Open-source framework for executing native Java code on the GPU, developed by Syncleus.' name='description'> - <title> - Aparapi | Profiling the Kernel - </title> - <!-- Favicons --> - <link href='/images/favicon/apple-touch-icon-152x152.png' rel='apple-touch-icon-precomposed'> - <meta content='#FFFFFF' name='msapplication-TileColor'> - <meta content='/images/favicon/mstile-144x144.png' name='msapplication-TileImage'> - <link href='/images/favicon/favicon-32x32.png' rel='icon' sizes='32x32'> - <!-- Android 5 Chrome Color --> - <meta content='#EE6E73' name='theme-color'> - <!-- CSS --> - <link href='/stylesheets/highlight.css' media='screen,projection' rel='stylesheet' type='text/css'> - <link href='/stylesheets/style.css' media='screen,projection' rel='stylesheet' type='text/css'> - <link href='http://fonts.googleapis.com/css?family=Inconsolata' rel='stylesheet' type='text/css'> - <link href='http://fonts.googleapis.com/icon?family=Material+Icons' rel='stylesheet'> - </head> - <body> - <header> - <div class='container'> - <a class='button-collapse top-nav waves-effect waves-light circle hide-on-large-only' data-activates='nav-mobile' href='#'> - <i class='material-icons'>menu</i> - </a> - </div> - <ul class='side-nav fixed' id='nav-mobile'> - <li class='logo'> - <a class='brand-logo' href='/' id='logo-container'> - <object data='/images/logo.svg' id='front-page-logo' type='image/svg+xml'>Your browser does not support SVG</object> - </a> - </li> - <li class='search'> - <div class='search-wrapper card'> - <input id='search'> - <i class='material-icons'>search</i> - <div class='search-results'></div> - </div> - </li> - <li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/">Overview</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Introduction</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/about.html">About</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/getting-started.html">Getting Started</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/faq.html">FAQ</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Documentation</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/aparapi-patterns.html">Aparapi Patterns</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/choosing-specific-devices.html">Choosing Specific Devices</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/converting-java-to-opencl.html">Converting Java to OpenCL</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/emulating-multiple-entrypoints.html">Emulating Multiple Entrypoints</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/explicit-buffer-handling.html">Explicit Buffer Handling</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/hsa-enabled-lambda.html">HSA Enabled Lambda</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/kernel-guidelines.html">Kernel Guidelines</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/library-agent-duality.html">Library Agent Duality</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/new-features.html">New Features</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/opencl-bindings.html">OpenCL Bindings</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/private-memory-space.html">Private Memory Space</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/profiling-the-kernel.html">Profiling the Kernel</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/setting-up-hsa.html">Setting Up HSA</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/unit-tests.html">Unit Tests</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/using-hsa-simulator.html">Using HSA Simulator</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/constant-memory.html">Constant Memory</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/local-memory.html">Local Memory</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/multiple-dim-ranges.html">Multiple Dim Ranges</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Proposals</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/multiple-dim-nd-range.html">Multiple Dim ND Range</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/lambdas.html">Lambdas</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/address-space-with-buffers.html">Address Space with Buffers</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/extensions.html">Extensions</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/device.html">Device</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/multiple-entry-points.html">Multiple Entry Points</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/lambda-syntax.html">Lambda Syntax</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/showcase.html">Showcase</a></li></ul></li> - </ul> - </header> - <main> - <div class='section no-pad-bot' id='index-banner'> - <div class='container'> - <h1 class='header center-on-small-only'>Profiling the Kernel</h1> - <div class='row center'> - <h4 class='header col s12 light center'>Using Aparapi's built in profiling APIs.</h4> - </div> - - </div> - - </div> - <div class='container'> - <p>If you want to extract OpenCL performance info from a kernel at runtime you need to set the property :-</p> - <pre class="highlight plaintext"><code> - -Dcom.aparapi.enableProfiling=true - </code></pre> - <p>Your application can then call kernel.getProfileInfo() after a successful call to kernel.execute(range) to extract a List List<ProfileInfo>.</p> - - <p>Each ProfileInfo holds timing information for buffer writes, executs and buffer reads.</p> - - <p>The following code will print a simple table of profile information</p> - <pre class="highlight java"><code> - <span class="n">List</span><span class="o"><</span><span class="n">ProfileInfo</span><span class="o">></span> <span class="n">profileInfo</span> <span class="o">=</span> <span class="n">k</span><span class="o">.</span><span class="na">getProfileInfo</span><span class="o">();</span> - <span class="k">for</span> <span class="o">(</span><span class="kd">final</span> <span class="n">ProfileInfo</span> <span class="n">p</span> <span class="o">:</span> <span class="n">profileInfo</span><span class="o">)</span> <span class="o">{</span> - <span class="n">System</span><span class="o">.</span><span class="na">out</span><span class="o">.</span><span class="na">print</span><span class="o">(</span><span class="s">" "</span> <span class="o">+</span> <span class="n">p</span><span class="o">.</span><span class="na">getType</span><span class="o">()</span> <span class="o">+</span> <span class="s">" "</span> <span class="o">+</span> <span class="n">p</span><span class="o">.</span><span class="na">getLabel</span><span class="o">()</span> <span class="o">+</span> <span class="s">" "</span> <span class="o">+</span> <span class="o">(</span><span class="n">p</span><span class="o">.</span><span class="na">getStart</span><span class="o">()</span> <span class="o">/</span> <span class="mi">1000</span><span class="o">)</span> <span class="o">+</span> <span class="s">" .. "</span> - <span class="o">+</span> <span class="o">(</span><span class="n">p</span><span class="o">.</span><span class="na">getEnd</span><span class="o">()</span> <span class="o">/</span> <span class="mi">1000</span><span class="o">)</span> <span class="o">+</span> <span class="s">" "</span> <span class="o">+</span> <span class="o">((</span><span class="n">p</span><span class="o">.</span><span class="na">getEnd</span><span class="o">()</span> <span class="o">-</span> <span class="n">p</span><span class="o">.</span><span class="na">getStart</span><span class="o">())</span> <span class="o">/</span> <span class="mi">1000</span><span class="o">)</span> <span class="o">+</span> <span class="s">"us"</span><span class="o">);</span> - <span class="n">System</span><span class="o">.</span><span class="na">out</span><span class="o">.</span><span class="na">println</span><span class="o">();</span> - <span class="o">}</span> - </code></pre> - <p>Here is an example implementation</p> - <pre class="highlight java"><code> - <span class="kd">final</span> <span class="kt">float</span> <span class="n">result</span><span class="o">[]</span> <span class="o">=</span> <span class="k">new</span> <span class="kt">float</span><span class="o">[</span><span class="mi">2048</span><span class="o">*</span><span class="mi">2048</span><span class="o">];</span> - <span class="n">Kernel</span> <span class="n">k</span> <span class="o">=</span> <span class="k">new</span> <span class="n">Kernel</span><span class="o">(){</span> - <span class="kd">public</span> <span class="kt">void</span> <span class="nf">run</span><span class="o">(){</span> - <span class="kd">final</span> <span class="kt">int</span> <span class="n">gid</span><span class="o">=</span><span class="n">getGlobalId</span><span class="o">();</span> - <span class="n">result</span><span class="o">[</span><span class="n">gid</span><span class="o">]</span> <span class="o">=</span><span class="mi">0</span><span class="n">f</span><span class="o">;</span> - <span class="o">}</span> - <span class="o">};</span> - <span class="n">k</span><span class="o">.</span><span class="na">execute</span><span class="o">(</span><span class="n">result</span><span class="o">.</span><span class="na">length</span><span class="o">);</span> - <span class="n">List</span><span class="o"><</span><span class="n">ProfileInfo</span><span class="o">></span> <span class="n">profileInfo</span> <span class="o">=</span> <span class="n">k</span><span class="o">.</span><span class="na">getProfileInfo</span><span class="o">();</span> - - <span class="k">for</span> <span class="o">(</span><span class="kd">final</span> <span class="n">ProfileInfo</span> <span class="n">p</span> <span class="o">:</span> <span class="n">profileInfo</span><span class="o">)</span> <span class="o">{</span> - <span class="n">System</span><span class="o">.</span><span class="na">out</span><span class="o">.</span><span class="na">print</span><span class="o">(</span><span class="s">" "</span> <span class="o">+</span> <span class="n">p</span><span class="o">.</span><span class="na">getType</span><span class="o">()</span> <span class="o">+</span> <span class="s">" "</span> <span class="o">+</span> <span class="n">p</span><span class="o">.</span><span class="na">getLabel</span><span class="o">()</span> <span class="o">+</span> <span class="s">" "</span> <span class="o">+</span> <span class="o">(</span><span class="n">p</span><span class="o">.</span><span class="na">getStart</span><span class="o">()</span> <span class="o">/</span> <span class="mi">1000</span><span class="o">)</span> <span class="o">+</span> <span class="s">" .. "</span> - <span class="o">+</span> <span class="o">(</span><span class="n">p</span><span class="o">.</span><span class="na">getEnd</span><span class="o">()</span> <span class="o">/</span> <span class="mi">1000</span><span class="o">)</span> <span class="o">+</span> <span class="s">" "</span> <span class="o">+</span> <span class="o">((</span><span class="n">p</span><span class="o">.</span><span class="na">getEnd</span><span class="o">()</span> <span class="o">-</span> <span class="n">p</span><span class="o">.</span><span class="na">getStart</span><span class="o">())</span> <span class="o">/</span> <span class="mi">1000</span><span class="o">)</span> <span class="o">+</span> <span class="s">"us"</span><span class="o">);</span> - <span class="n">System</span><span class="o">.</span><span class="na">out</span><span class="o">.</span><span class="na">println</span><span class="o">();</span> - <span class="o">}</span> - <span class="n">k</span><span class="o">.</span><span class="na">dispose</span><span class="o">();</span> - </code></pre> - <p>And here is the tabular output from</p> - <pre class="highlight plaintext"><code> - java - -Djava.library.path=${APARAPI_HOME} - -Dcom.aparapi.enableProfiling=true - -cp ${APARAPI_HOME}:. - MyClass - - W val$result 69500 .. 72694 3194us - X exec() 72694 .. 72835 141us - R val$result 75327 .. 78225 2898us - </code></pre> - <p>The table shows that the transfer of the ‘result’ buffer to the device ('W’) took 3194 us (micro seconds), the execute ('X’) of the kernel 141 us and the read ('R’) of resulting buffer 2898 us.</p> - </div> - </main> - <footer class='page-footer'> - <div class='container'> - <div class='row'> - <div class='col l4 s12'> - <h5 class='white-text'>Help Aparapi Grow</h5> - <p class='grey-text text-lighten-4'>We are a team of volunteers working on this project like it's our full time job. Any amount would help support and continue development on this project and is greatly appreciated.</p> - <form action='https://www.paypal.com/cgi-bin/webscr' id='paypal-donate' method='post' target='_top'> - <input name='cmd' type='hidden' value='_s-xclick'> - <input name='encrypted' type='hidden' value='-----BEGIN PKCS7-----MIIHoAYJKoZIhvcNAQcEoIIHkTCCB40CAQExggEwMIIBLAIBADCBlDCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20CAQAwDQYJKoZIhvcNAQEBBQAEgYATcKxN8t35TG2x34eY272SuZO3QbGy+BTGIM5DRV6Hmosotzw2TF42ceWmbXb3Gk4Wy5kUgo4TgHExCZHUSlHUl+A9KWLFejotgQJPhbiBsnns3klWbKftA3LEnP/kz/SW7OyBlpluoHoEGb354/aoX3JEctp3akHiZEmD7JyEgjELMAkGBSsOAwIaBQAwggEcBgkqhkiG9w0BBwEwFAYIKoZIhvcNAwcECOGCJwba6JICgIH4RtE1LE3juagKs+swI5tb9Y2LacWo+qn1H1aLKeg57bQMqqcWYvkoO1joYoglPc1h4mO0egZjHPQ6ih0K0IYlXw2SRpNylSlIMUE3GW6smjSSwRhscZfXQYUnmQsfYvkFwoKrlZGf/1u0Q7nwlZ1szIKnDMZ5f+k8xBcM0sMNutn/y9CH6A3zo01gQBIF29+1WYAoQspNAnfWQy3ydV7nbjIA9ThDp2WquWw3EVlvqlvm/3C2AFuH/L4q0ltn3qjkCdzXK0O2jW3TRrzligPkAy6CN0Tw2jGW5GENNC1L92vHFH4kBXUPlhvw39TgoN7/KRUjVoYPYgugggOHMIIDgzCCAuygAwIBAgIBADANBgkqhkiG9w0BAQUFADCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20wHhcNMDQwMjEzMTAxMzE1WhcNMzUwMjEzMTAxMzE1WjCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20wgZ8wDQYJKoZIhvcNAQEBBQADgY0AMIGJAoGBAMFHTt38RMxLXJyO2SmS+Ndl72T7oKJ4u4uw+6awntALWh03PewmIJuzbALScsTS4sZoS1fKciBGoh11gIfHzylvkdNe/hJl66/RGqrj5rFb08sAABNTzDTiqqNpJeBsYs/c2aiGozptX2RlnBktH+SUNpAajW724Nv2Wvhif6sFAgMBAAGjge4wgeswHQYDVR0OBBYEFJaffLvGbxe9WT9S1wob7BDWZJRrMIG7BgNVHSMEgbMwgbCAFJaffLvGbxe9WT9S1wob7BDWZJRroYGUpIGRMIGOMQswCQYDVQQGEwJVUzELMAkGA1UECBMCQ0ExFjAUBgNVBAcTDU1vdW50YWluIFZpZXcxFDASBgNVBAoTC1BheVBhbCBJbmMuMRMwEQYDVQQLFApsaXZlX2NlcnRzMREwDwYDVQQDFAhsaXZlX2FwaTEcMBoGCSqGSIb3DQEJARYNcmVAcGF5cGFsLmNvbYIBADAMBgNVHRMEBTADAQH/MA0GCSqGSIb3DQEBBQUAA4GBAIFfOlaagFrl71+jq6OKidbWFSE+Q4FqROvdgIONth+8kSK//Y/4ihuE4Ymvzn5ceE3S/iBSQQMjyvb+s2TWbQYDwcp129OPIbD9epdr4tJOUNiSojw7BHwYRiPh58S1xGlFgHFXwrEBb3dgNbMUa+u4qectsMAXpVHnD9wIyfmHMYIBmjCCAZYCAQEwgZQwgY4xCzAJBgNVBAYTAlVTMQswCQYDVQQIEwJDQTEWMBQGA1UEBxMNTW91bnRhaW4gVmlldzEUMBIGA1UEChMLUGF5UGFsIEluYy4xEzARBgNVBAsUCmxpdmVfY2VydHMxETAPBgNVBAMUCGxpdmVfYXBpMRwwGgYJKoZIhvcNAQkBFg1yZUBwYXlwYWwuY29tAgEAMAkGBSsOAwIaBQCgXTAYBgkqhkiG9w0BCQMxCwYJKoZIhvcNAQcBMBwGCSqGSIb3DQEJBTEPFw0xNjExMjkyMjA1NTNaMCMGCSqGSIb3DQEJBDEWBBS4i3Exr/pFcKOJy8uKmH+nGIMjqDANBgkqhkiG9w0BAQEFAASBgDAbFZ2jieloeB/0wCAcvYCFAIXmmBaMS5js/byzU5gK7exSTlRMX74IkmHemItaOcw3wyFlu4i118D9K9SbSbFiX9DGDcezGh42u/6G8TuZMwlvmiehwMioTVcm4jWG40YLiv8pJZypfoSx2w4IAFb4na5i/E1qOrwQOpiBho+s-----END PKCS7-----'> - <button alt='PayPal - The safer, easier way to pay online!' class='btn waves-effect waves-light red lighten-3' name='action' type='submit'> - Donate Now - </button> - </form> - </div> - <div class='col l4 s12'> - <h5 class='white-text'>Join the Discussion</h5> - <p class='grey-text text-lighten-4'>We have a Gitter chat room set up where you can talk directly with us. Come in and discuss new features, future goals, general problems or questions, or anything else you can think of.</p> - <a class='btn waves-effect waves-light red lighten-3' href='https://gitter.im/Syncleus/aparapi' target='_blank'>Chat</a> - </div> - <div class='col l4 s12' style='overflow: hidden;'> - <h5 class='white-text'>Connect</h5> - <iframe allowtransparency='true' frameborder='0' height='30' scrolling='0' src='http://ghbtns.com/github-btn.html?user=Syncleus&repo=aparapi&type=watch&count=true&size=large' width='170'></iframe> - <br> - <a class='twitter-follow-button' data-dnt='true' data-show-count='true' data-size='large' href='https://twitter.com/AparapiLib'>Follow @AparapiLib</a> - <br> - <div class='g-follow' data-annotation='bubble' data-height='24' data-href='https://plus.google.com/102266131584900704956' data-rel='publisher'></div> - </div> - </div> - </div> - <div class='footer-copyright'> - <div class='container'> - © 2016-2017 Syncleus, All rights reserved. - <a class='grey-text text-lighten-4 right' href='https://github.com/Syncleus/aparapi/blob/master/LICENSE'>Apache License v2</a> - </div> - </div> - </footer> - <!-- Scripts --> - <script src='https://code.jquery.com/jquery-2.1.4.min.js'></script> - <script> - if (!window.jQuery) { document.write('<script src="bin/jquery-2.1.1.min.js"><\/script>'); } - </script> - <script src='/javascripts/jquery.timeago.js'></script> - <script src='/javascripts/materialize.min.js'></script> - <script src='/javascripts/lunr.min.js'></script> - <script src='/javascripts/search.js'></script> - <script src='/javascripts/materialize.js'></script> - <script src='/javascripts/init.js'></script> - <!-- Twitter Button --> - <script> - !function(d,s,id){var js,fjs=d.getElementsByTagName(s)[0],p=/^http:/.test(d.location)?'http':'https';if(!d.getElementById(id)){js=d.createElement(s);js.id=id;js.src=p+'://platform.twitter.com/widgets.js';fjs.parentNode.insertBefore(js,fjs);}}(document, 'script', 'twitter-wjs'); - </script> - <!-- Google Plus Button --> - <script async='' defer='defer' src='https://apis.google.com/js/platform.js'></script> - </body> +<head> +<meta content='text/html; charset=UTF-8' http-equiv='Content-Type'> +<meta content='width=device-width, initial-scale=1' name='viewport'> +<meta content='IE=edge' http-equiv='X-UA-Compatible'> +<meta content='no' name='msapplication-tap-highlight'> +<meta content='Aparapi is an Open-source framework for executing native Java code on the GPU, developed by Syncleus.' name='description'> +<title> +Aparapi | Profiling the Kernel +</title> +<!-- Favicons --> +<link href='/images/favicon/apple-touch-icon-152x152.png' rel='apple-touch-icon-precomposed'> +<meta content='#FFFFFF' name='msapplication-TileColor'> +<meta content='/images/favicon/mstile-144x144.png' name='msapplication-TileImage'> +<link href='/images/favicon/favicon-32x32.png' rel='icon' sizes='32x32'> +<!-- Android 5 Chrome Color --> +<meta content='#EE6E73' name='theme-color'> +<!-- CSS --> +<link href='/stylesheets/highlight.css' media='screen,projection' rel='stylesheet' type='text/css'> +<link href='/stylesheets/style.css' media='screen,projection' rel='stylesheet' type='text/css'> +<link href='http://fonts.googleapis.com/css?family=Inconsolata' rel='stylesheet' type='text/css'> +<link href='http://fonts.googleapis.com/icon?family=Material+Icons' rel='stylesheet'> +</head> +<body> +<header> +<div class='container'> +<a class='button-collapse top-nav waves-effect waves-light circle hide-on-large-only' data-activates='nav-mobile' href='#'> +<i class='material-icons'>menu</i> +</a> +</div> +<ul class='side-nav fixed' id='nav-mobile'> +<li class='logo'> +<a class='brand-logo' href='/' id='logo-container'> +<object data='/images/logo.svg' id='front-page-logo' type='image/svg+xml'>Your browser does not support SVG</object> +</a> +</li> +<li class='search'> +<div class='search-wrapper card'> +<input id='search'> +<i class='material-icons'>search</i> +<div class='search-results'></div> +</div> +</li> +<li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/">Overview</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Introduction</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/about.html">About</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/getting-started.html">Getting Started</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/faq.html">FAQ</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Documentation</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/aparapi-patterns.html">Aparapi Patterns</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/choosing-specific-devices.html">Choosing Specific Devices</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/converting-java-to-opencl.html">Converting Java to OpenCL</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/emulating-multiple-entrypoints.html">Emulating Multiple Entrypoints</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/explicit-buffer-handling.html">Explicit Buffer Handling</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/hsa-enabled-lambda.html">HSA Enabled Lambda</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/kernel-guidelines.html">Kernel Guidelines</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/library-agent-duality.html">Library Agent Duality</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/new-features.html">New Features</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/opencl-bindings.html">OpenCL Bindings</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/private-memory-space.html">Private Memory Space</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/profiling-the-kernel.html">Profiling the Kernel</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/setting-up-hsa.html">Setting Up HSA</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/unit-tests.html">Unit Tests</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/using-hsa-simulator.html">Using HSA Simulator</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/constant-memory.html">Constant Memory</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/local-memory.html">Local Memory</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/multiple-dim-ranges.html">Multiple Dim Ranges</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Proposals</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/multiple-dim-nd-range.html">Multiple Dim ND Range</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/lambdas.html">Lambdas</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/address-space-with-buffers.html">Address Space with Buffers</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/extensions.html">Extensions</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/device.html">Device</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/multiple-entry-points.html">Multiple Entry Points</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/lambda-syntax.html">Lambda Syntax</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/showcase.html">Showcase</a></li></ul></li> +</ul> +</header> +<main> +<div class='section no-pad-bot' id='index-banner'> +<div class='container'> +<h1 class='header center-on-small-only'>Profiling the Kernel</h1> +<div class='row center'> +<h4 class='header col s12 light center'>Using Aparapi's built in profiling APIs.</h4> +</div> + +</div> + +</div> +<div class='container'> +<p>If you want to extract OpenCL performance info from a kernel at runtime you need to set the property :-</p> +<div class="highlight"><pre class="highlight plaintext"><code> +-Dcom.aparapi.enableProfiling=true +</code></pre></div> +<p>Your application can then call kernel.getProfileInfo() after a successful call to kernel.execute(range) to extract a List List<ProfileInfo>.</p> + +<p>Each ProfileInfo holds timing information for buffer writes, executs and buffer reads.</p> + +<p>The following code will print a simple table of profile information</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="n">List</span><span class="o"><</span><span class="n">ProfileInfo</span><span class="o">></span> <span class="n">profileInfo</span> <span class="o">=</span> <span class="n">k</span><span class="o">.</span><span class="na">getProfileInfo</span><span class="o">();</span> +<span class="k">for</span> <span class="o">(</span><span class="kd">final</span> <span class="n">ProfileInfo</span> <span class="n">p</span> <span class="o">:</span> <span class="n">profileInfo</span><span class="o">)</span> <span class="o">{</span> + <span class="n">System</span><span class="o">.</span><span class="na">out</span><span class="o">.</span><span class="na">print</span><span class="o">(</span><span class="s">" "</span> <span class="o">+</span> <span class="n">p</span><span class="o">.</span><span class="na">getType</span><span class="o">()</span> <span class="o">+</span> <span class="s">" "</span> <span class="o">+</span> <span class="n">p</span><span class="o">.</span><span class="na">getLabel</span><span class="o">()</span> <span class="o">+</span> <span class="s">" "</span> <span class="o">+</span> <span class="o">(</span><span class="n">p</span><span class="o">.</span><span class="na">getStart</span><span class="o">()</span> <span class="o">/</span> <span class="mi">1000</span><span class="o">)</span> <span class="o">+</span> <span class="s">" .. "</span> + <span class="o">+</span> <span class="o">(</span><span class="n">p</span><span class="o">.</span><span class="na">getEnd</span><span class="o">()</span> <span class="o">/</span> <span class="mi">1000</span><span class="o">)</span> <span class="o">+</span> <span class="s">" "</span> <span class="o">+</span> <span class="o">((</span><span class="n">p</span><span class="o">.</span><span class="na">getEnd</span><span class="o">()</span> <span class="o">-</span> <span class="n">p</span><span class="o">.</span><span class="na">getStart</span><span class="o">())</span> <span class="o">/</span> <span class="mi">1000</span><span class="o">)</span> <span class="o">+</span> <span class="s">"us"</span><span class="o">);</span> + <span class="n">System</span><span class="o">.</span><span class="na">out</span><span class="o">.</span><span class="na">println</span><span class="o">();</span> +<span class="o">}</span> +</code></pre></div> +<p>Here is an example implementation</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="kd">final</span> <span class="kt">float</span> <span class="n">result</span><span class="o">[]</span> <span class="o">=</span> <span class="k">new</span> <span class="kt">float</span><span class="o">[</span><span class="mi">2048</span><span class="o">*</span><span class="mi">2048</span><span class="o">];</span> +<span class="n">Kernel</span> <span class="n">k</span> <span class="o">=</span> <span class="k">new</span> <span class="n">Kernel</span><span class="o">(){</span> + <span class="kd">public</span> <span class="kt">void</span> <span class="nf">run</span><span class="o">(){</span> + <span class="kd">final</span> <span class="kt">int</span> <span class="n">gid</span><span class="o">=</span><span class="n">getGlobalId</span><span class="o">();</span> + <span class="n">result</span><span class="o">[</span><span class="n">gid</span><span class="o">]</span> <span class="o">=</span><span class="mi">0</span><span class="n">f</span><span class="o">;</span> + <span class="o">}</span> +<span class="o">};</span> +<span class="n">k</span><span class="o">.</span><span class="na">execute</span><span class="o">(</span><span class="n">result</span><span class="o">.</span><span class="na">length</span><span class="o">);</span> +<span class="n">List</span><span class="o"><</span><span class="n">ProfileInfo</span><span class="o">></span> <span class="n">profileInfo</span> <span class="o">=</span> <span class="n">k</span><span class="o">.</span><span class="na">getProfileInfo</span><span class="o">();</span> + +<span class="k">for</span> <span class="o">(</span><span class="kd">final</span> <span class="n">ProfileInfo</span> <span class="n">p</span> <span class="o">:</span> <span class="n">profileInfo</span><span class="o">)</span> <span class="o">{</span> + <span class="n">System</span><span class="o">.</span><span class="na">out</span><span class="o">.</span><span class="na">print</span><span class="o">(</span><span class="s">" "</span> <span class="o">+</span> <span class="n">p</span><span class="o">.</span><span class="na">getType</span><span class="o">()</span> <span class="o">+</span> <span class="s">" "</span> <span class="o">+</span> <span class="n">p</span><span class="o">.</span><span class="na">getLabel</span><span class="o">()</span> <span class="o">+</span> <span class="s">" "</span> <span class="o">+</span> <span class="o">(</span><span class="n">p</span><span class="o">.</span><span class="na">getStart</span><span class="o">()</span> <span class="o">/</span> <span class="mi">1000</span><span class="o">)</span> <span class="o">+</span> <span class="s">" .. "</span> + <span class="o">+</span> <span class="o">(</span><span class="n">p</span><span class="o">.</span><span class="na">getEnd</span><span class="o">()</span> <span class="o">/</span> <span class="mi">1000</span><span class="o">)</span> <span class="o">+</span> <span class="s">" "</span> <span class="o">+</span> <span class="o">((</span><span class="n">p</span><span class="o">.</span><span class="na">getEnd</span><span class="o">()</span> <span class="o">-</span> <span class="n">p</span><span class="o">.</span><span class="na">getStart</span><span class="o">())</span> <span class="o">/</span> <span class="mi">1000</span><span class="o">)</span> <span class="o">+</span> <span class="s">"us"</span><span class="o">);</span> + <span class="n">System</span><span class="o">.</span><span class="na">out</span><span class="o">.</span><span class="na">println</span><span class="o">();</span> +<span class="o">}</span> +<span class="n">k</span><span class="o">.</span><span class="na">dispose</span><span class="o">();</span> +</code></pre></div> +<p>And here is the tabular output from</p> +<div class="highlight"><pre class="highlight plaintext"><code> +java + -Djava.library.path=${APARAPI_HOME} + -Dcom.aparapi.enableProfiling=true + -cp ${APARAPI_HOME}:. + MyClass + +W val$result 69500 .. 72694 3194us +X exec() 72694 .. 72835 141us +R val$result 75327 .. 78225 2898us +</code></pre></div> +<p>The table shows that the transfer of the ‘result’ buffer to the device (‘W’) took 3194 us (micro seconds), the execute (‘X’) of the kernel 141 us and the read (‘R’) of resulting buffer 2898 us.</p> + +</div> +</main> +<footer class='page-footer'> +<div class='container'> +<div class='row'> +<div class='col l4 s12'> +<h5 class='white-text'>Help Aparapi Grow</h5> +<p class='grey-text text-lighten-4'>We are a team of volunteers working on this project like it's our full time job. Any amount would help support and continue development on this project and is greatly appreciated.</p> +<form action='https://www.paypal.com/cgi-bin/webscr' id='paypal-donate' method='post' target='_top'> +<input name='cmd' type='hidden' value='_s-xclick'> +<input name='encrypted' type='hidden' value='-----BEGIN PKCS7-----MIIHoAYJKoZIhvcNAQcEoIIHkTCCB40CAQExggEwMIIBLAIBADCBlDCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20CAQAwDQYJKoZIhvcNAQEBBQAEgYATcKxN8t35TG2x34eY272SuZO3QbGy+BTGIM5DRV6Hmosotzw2TF42ceWmbXb3Gk4Wy5kUgo4TgHExCZHUSlHUl+A9KWLFejotgQJPhbiBsnns3klWbKftA3LEnP/kz/SW7OyBlpluoHoEGb354/aoX3JEctp3akHiZEmD7JyEgjELMAkGBSsOAwIaBQAwggEcBgkqhkiG9w0BBwEwFAYIKoZIhvcNAwcECOGCJwba6JICgIH4RtE1LE3juagKs+swI5tb9Y2LacWo+qn1H1aLKeg57bQMqqcWYvkoO1joYoglPc1h4mO0egZjHPQ6ih0K0IYlXw2SRpNylSlIMUE3GW6smjSSwRhscZfXQYUnmQsfYvkFwoKrlZGf/1u0Q7nwlZ1szIKnDMZ5f+k8xBcM0sMNutn/y9CH6A3zo01gQBIF29+1WYAoQspNAnfWQy3ydV7nbjIA9ThDp2WquWw3EVlvqlvm/3C2AFuH/L4q0ltn3qjkCdzXK0O2jW3TRrzligPkAy6CN0Tw2jGW5GENNC1L92vHFH4kBXUPlhvw39TgoN7/KRUjVoYPYgugggOHMIIDgzCCAuygAwIBAgIBADANBgkqhkiG9w0BAQUFADCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20wHhcNMDQwMjEzMTAxMzE1WhcNMzUwMjEzMTAxMzE1WjCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20wgZ8wDQYJKoZIhvcNAQEBBQADgY0AMIGJAoGBAMFHTt38RMxLXJyO2SmS+Ndl72T7oKJ4u4uw+6awntALWh03PewmIJuzbALScsTS4sZoS1fKciBGoh11gIfHzylvkdNe/hJl66/RGqrj5rFb08sAABNTzDTiqqNpJeBsYs/c2aiGozptX2RlnBktH+SUNpAajW724Nv2Wvhif6sFAgMBAAGjge4wgeswHQYDVR0OBBYEFJaffLvGbxe9WT9S1wob7BDWZJRrMIG7BgNVHSMEgbMwgbCAFJaffLvGbxe9WT9S1wob7BDWZJRroYGUpIGRMIGOMQswCQYDVQQGEwJVUzELMAkGA1UECBMCQ0ExFjAUBgNVBAcTDU1vdW50YWluIFZpZXcxFDASBgNVBAoTC1BheVBhbCBJbmMuMRMwEQYDVQQLFApsaXZlX2NlcnRzMREwDwYDVQQDFAhsaXZlX2FwaTEcMBoGCSqGSIb3DQEJARYNcmVAcGF5cGFsLmNvbYIBADAMBgNVHRMEBTADAQH/MA0GCSqGSIb3DQEBBQUAA4GBAIFfOlaagFrl71+jq6OKidbWFSE+Q4FqROvdgIONth+8kSK//Y/4ihuE4Ymvzn5ceE3S/iBSQQMjyvb+s2TWbQYDwcp129OPIbD9epdr4tJOUNiSojw7BHwYRiPh58S1xGlFgHFXwrEBb3dgNbMUa+u4qectsMAXpVHnD9wIyfmHMYIBmjCCAZYCAQEwgZQwgY4xCzAJBgNVBAYTAlVTMQswCQYDVQQIEwJDQTEWMBQGA1UEBxMNTW91bnRhaW4gVmlldzEUMBIGA1UEChMLUGF5UGFsIEluYy4xEzARBgNVBAsUCmxpdmVfY2VydHMxETAPBgNVBAMUCGxpdmVfYXBpMRwwGgYJKoZIhvcNAQkBFg1yZUBwYXlwYWwuY29tAgEAMAkGBSsOAwIaBQCgXTAYBgkqhkiG9w0BCQMxCwYJKoZIhvcNAQcBMBwGCSqGSIb3DQEJBTEPFw0xNjExMjkyMjA1NTNaMCMGCSqGSIb3DQEJBDEWBBS4i3Exr/pFcKOJy8uKmH+nGIMjqDANBgkqhkiG9w0BAQEFAASBgDAbFZ2jieloeB/0wCAcvYCFAIXmmBaMS5js/byzU5gK7exSTlRMX74IkmHemItaOcw3wyFlu4i118D9K9SbSbFiX9DGDcezGh42u/6G8TuZMwlvmiehwMioTVcm4jWG40YLiv8pJZypfoSx2w4IAFb4na5i/E1qOrwQOpiBho+s-----END PKCS7----- +'> +<button alt='PayPal - The safer, easier way to pay online!' class='btn waves-effect waves-light red lighten-3' name='action' type='submit'> +Donate Now +</button> +</form> +</div> +<div class='col l4 s12'> +<h5 class='white-text'>Join the Discussion</h5> +<p class='grey-text text-lighten-4'>We have a Gitter chat room set up where you can talk directly with us. Come in and discuss new features, future goals, general problems or questions, or anything else you can think of.</p> +<a class='btn waves-effect waves-light red lighten-3' href='https://gitter.im/Syncleus/aparapi' target='_blank'>Chat</a> +</div> +<div class='col l4 s12' style='overflow: hidden;'> +<h5 class='white-text'>Connect</h5> +<iframe allowtransparency='true' frameborder='0' height='30' scrolling='0' src='http://ghbtns.com/github-btn.html?user=Syncleus&repo=aparapi&type=watch&count=true&size=large' width='170'></iframe> +<br> +<a class='twitter-follow-button' data-dnt='true' data-show-count='true' data-size='large' href='https://twitter.com/AparapiLib'>Follow @AparapiLib</a> +<br> +<div class='g-follow' data-annotation='bubble' data-height='24' data-href='https://plus.google.com/102266131584900704956' data-rel='publisher'></div> +</div> +</div> +</div> +<div class='footer-copyright'> +<div class='container'> +© 2016-2017 Syncleus, All rights reserved. +<a class='grey-text text-lighten-4 right' href='https://github.com/Syncleus/aparapi/blob/master/LICENSE'>Apache License v2</a> +</div> +</div> +</footer> +<!-- Scripts --> +<script src='https://code.jquery.com/jquery-2.1.4.min.js'></script> +<script> + if (!window.jQuery) { document.write('<script src="bin/jquery-2.1.1.min.js"><\/script>'); } +</script> +<script src='/javascripts/jquery.timeago.js'></script> +<script src='/javascripts/materialize.min.js'></script> +<script src='/javascripts/lunr.min.js'></script> +<script src='/javascripts/search.js'></script> +<script src='/javascripts/materialize.js'></script> +<script src='/javascripts/init.js'></script> +<!-- Twitter Button --> +<script> + !function(d,s,id){var js,fjs=d.getElementsByTagName(s)[0],p=/^http:/.test(d.location)?'http':'https';if(!d.getElementById(id)){js=d.createElement(s);js.id=id;js.src=p+'://platform.twitter.com/widgets.js';fjs.parentNode.insertBefore(js,fjs);}}(document, 'script', 'twitter-wjs'); +</script> +<!-- Google Plus Button --> +<script async='' defer='defer' src='https://apis.google.com/js/platform.js'></script> +</body> </html> diff --git a/documentation/setting-up-hsa.html b/documentation/setting-up-hsa.html index 67b3af5d3d73fe4b8345892de75e6bc1ab9ada5e..fcfb00d98bd6c5cab0ff4db9eb7ba4e4f692c902 100644 --- a/documentation/setting-up-hsa.html +++ b/documentation/setting-up-hsa.html @@ -1,400 +1,401 @@ <!DOCTYPE html> <html lang='en'> - <head> - <meta content='text/html; charset=UTF-8' http-equiv='Content-Type'> - <meta content='width=device-width, initial-scale=1' name='viewport'> - <meta content='IE=edge' http-equiv='X-UA-Compatible'> - <meta content='no' name='msapplication-tap-highlight'> - <meta content='Aparapi is an Open-source framework for executing native Java code on the GPU, developed by Syncleus.' name='description'> - <title> - Aparapi | Setting Up HSA - </title> - <!-- Favicons --> - <link href='/images/favicon/apple-touch-icon-152x152.png' rel='apple-touch-icon-precomposed'> - <meta content='#FFFFFF' name='msapplication-TileColor'> - <meta content='/images/favicon/mstile-144x144.png' name='msapplication-TileImage'> - <link href='/images/favicon/favicon-32x32.png' rel='icon' sizes='32x32'> - <!-- Android 5 Chrome Color --> - <meta content='#EE6E73' name='theme-color'> - <!-- CSS --> - <link href='/stylesheets/highlight.css' media='screen,projection' rel='stylesheet' type='text/css'> - <link href='/stylesheets/style.css' media='screen,projection' rel='stylesheet' type='text/css'> - <link href='http://fonts.googleapis.com/css?family=Inconsolata' rel='stylesheet' type='text/css'> - <link href='http://fonts.googleapis.com/icon?family=Material+Icons' rel='stylesheet'> - </head> - <body> - <header> - <div class='container'> - <a class='button-collapse top-nav waves-effect waves-light circle hide-on-large-only' data-activates='nav-mobile' href='#'> - <i class='material-icons'>menu</i> - </a> - </div> - <ul class='side-nav fixed' id='nav-mobile'> - <li class='logo'> - <a class='brand-logo' href='/' id='logo-container'> - <object data='/images/logo.svg' id='front-page-logo' type='image/svg+xml'>Your browser does not support SVG</object> - </a> - </li> - <li class='search'> - <div class='search-wrapper card'> - <input id='search'> - <i class='material-icons'>search</i> - <div class='search-results'></div> - </div> - </li> - <li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/">Overview</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Introduction</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/about.html">About</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/getting-started.html">Getting Started</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/faq.html">FAQ</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Documentation</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/aparapi-patterns.html">Aparapi Patterns</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/choosing-specific-devices.html">Choosing Specific Devices</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/converting-java-to-opencl.html">Converting Java to OpenCL</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/emulating-multiple-entrypoints.html">Emulating Multiple Entrypoints</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/explicit-buffer-handling.html">Explicit Buffer Handling</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/hsa-enabled-lambda.html">HSA Enabled Lambda</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/kernel-guidelines.html">Kernel Guidelines</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/library-agent-duality.html">Library Agent Duality</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/new-features.html">New Features</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/opencl-bindings.html">OpenCL Bindings</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/private-memory-space.html">Private Memory Space</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/profiling-the-kernel.html">Profiling the Kernel</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/setting-up-hsa.html">Setting Up HSA</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/unit-tests.html">Unit Tests</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/using-hsa-simulator.html">Using HSA Simulator</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/constant-memory.html">Constant Memory</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/local-memory.html">Local Memory</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/multiple-dim-ranges.html">Multiple Dim Ranges</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Proposals</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/multiple-dim-nd-range.html">Multiple Dim ND Range</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/lambdas.html">Lambdas</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/address-space-with-buffers.html">Address Space with Buffers</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/extensions.html">Extensions</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/device.html">Device</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/multiple-entry-points.html">Multiple Entry Points</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/lambda-syntax.html">Lambda Syntax</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/showcase.html">Showcase</a></li></ul></li> - </ul> - </header> - <main> - <div class='section no-pad-bot' id='index-banner'> - <div class='container'> - <h1 class='header center-on-small-only'>Setting Up HSA</h1> - <div class='row center'> - <h4 class='header col s12 light center'>How to setup a HSA machine for testing HSA enabled Aparapi</h4> - </div> - - </div> - - </div> - <div class='container'> - <ul> - <li>HSA Videos - - <ul> - <li><a href="http://www.youtube.com/watch?v=caEPq4KvTTA" rel="nofollow">http://www.youtube.com/watch?v=caEPq4KvTTA</a></li> - </ul></li> - <li>HSA Articles - - <ul> - <li><a href="http://developer.amd.com/resources/heterogeneous-computing/what-is-heterogeneous-computing/" rel="nofollow">http://developer.amd.com/resources/heterogeneous-computing/what-is-heterogeneous-computing/</a></li> - </ul></li> - <li>HSA Foundation - - <ul> - <li><a href="https://github.com/HSAFoundation" rel="nofollow">https://github.com/HSAFoundation</a></li> - </ul></li> - </ul> - - <h2>Introduction</h2> - - <p>Now that HSA hardware is generally available I figured it was time to describe how to setup a HSA enabled Linux platform so that it can run Aparapi.</p> - - <p>Here is a nice introduction to HSA <a href="http://developer.amd.com/resources/heterogeneous-computing/what-is-heterogeneous-system-architecture-hsa/" rel="nofollow">http://developer.amd.com/resources/heterogeneous-computing/what-is-heterogeneous-system-architecture-hsa/</a></p> - - <p>But for Aparapi users the main advantage is that we are no longer limited to the GPU memory for running GPU tasks. Also because the CPU and the GPU can both see the same memory (the Java heap) Aparapi code can now access Java objects directly. This removes a number of Aparapi constraints. So more of your code can now run on the GPU.</p> - - <h2>Hardware Required</h2> - - <p>These instructions were based on my experience setting up a platform using the following hardware.</p> - - <table><thead> - <tr> - <th>Component</th> - <th>Suggested</th> - </tr> - </thead><tbody> - <tr> - <td>APU</td> - <td>AMD A10-7850K APU <a href="http://www.amd.com/us/products/desktop/processors/a-series/Pages/a-series-apu.aspx" rel="nofollow">http://www.amd.com/us/products/desktop/processors/a-series/Pages/a-series-apu.aspx</a></td> - </tr> - <tr> - <td>Motherboard</td> - <td>ASUS A88X-PRO or A88XM-A <a href="http://www.asus.com/Motherboards/A88XPRO" rel="nofollow">http://www.asus.com/Motherboards/A88XPRO</a> <a href="http://www.asus.com/Motherboards/A88XMA" rel="nofollow">http://www.asus.com/Motherboards/A88XMA</a></td> - </tr> - <tr> - <td>Memory</td> - <td>G.SKILL Ripjaws X Series 16GB (2 x 8GB) 240-Pin DDR3 SDRAM DDR3 2133</td> - </tr> - </tbody></table> - - <h2>Software Required</h2> - - <p>We also have some software dependencies.</p> - - <table><thead> - <tr> - <th>Component</th> - <th>Suggested</th> - </tr> - </thead><tbody> - <tr> - <td>Java 8 JDK</td> - <td><a href="http://www.oracle.com/technetwork/java/javase/downloads/ea-jsp-142245.html" rel="nofollow">http://www.oracle.com/technetwork/java/javase/downloads/ea-jsp-142245.html</a></td> - </tr> - <tr> - <td>Ubuntu 13.10 64-bit edition</td> - <td><a href="http://www.ubuntu.com/download" rel="nofollow">http://www.ubuntu.com/download</a></td> - </tr> - <tr> - <td>Ubuntu 13.10 64-bit edition HSA enabled kernel image</td> - <td><a href="https://github.com/HSAFoundation/Linux-HSA-Drivers-And-Images-AMD" rel="nofollow">https://github.com/HSAFoundation/Linux-HSA-Drivers-And-Images-AMD</a></td> - </tr> - <tr> - <td>OKRA HSA enabled runtime</td> - <td><a href="https://github.com/HSAFoundation/Okra-Interface-to-HSA-Device" rel="nofollow">https://github.com/HSAFoundation/Okra-Interface-to-HSA-Device</a></td> - </tr> - </tbody></table> - - <p>The hope is that the list of HW/SW support widens, but for early adopters this is the set of HW/SW we have been testing with.</p> - - <h1>Setting up your System</h1> - - <h2>Configure your BIOS to support IOMMU</h2> - - <p>Once you have built your AMD A10-7850K APU based system you should make sure that your system is configured to use IOMMU.</p> - - <p>Remember HSA allows the GPU and CPU cores to share the same memory. IOMMU needs to be enabled for this.</p> - - <h2>For the A88X-PRO board</h2> - - <p>For the recommended ASUS board above you will need to make sure that your BIOS is updated to version 0802. Here is a direct link to the 0802 version of the BIOS from ASUS’s site as of 2/28/2014.</p> - - <p><a href="http://dlcdnet.asus.com/pub/ASUS/mb/SocketFM2/A88X-PRO/A88X-PRO-ASUS-0802.zip" rel="nofollow">http://dlcdnet.asus.com/pub/ASUS/mb/SocketFM2/A88X-PRO/A88X-PRO-ASUS-0802.zip</a></p> - - <p>Once you have the latest BIOS you will need to enable IOMMU in the system BIOS. This is done using the “CPU Configuration” screen under “Advanced Mode” and then enabling IOMMU.</p> - - <h2>For the A88XM-A</h2> - - <p>You will need the 1102 (or later) version of the BIOS</p> - - <p><a href="http://dlcdnet.asus.com/pub/ASUS/mb/SocketFM2/A88XM-A/A88XM-A-ASUS-1102.zip" rel="nofollow">http://dlcdnet.asus.com/pub/ASUS/mb/SocketFM2/A88XM-A/A88XM-A-ASUS-1102.zip</a></p> - - <p>Once you have the latest BIOS you will need to enable IOMMU in the system BIOS. This is done using the “CPU Configuration” screen under “Advanced Mode” and then enabling IOMMU.</p> - - <h2>Installing Ubuntu 13.10</h2> - - <p>Once you have your BIOS setup you need to install Ubuntu <a href="http://www.ubuntu.com/download" rel="nofollow">http://www.ubuntu.com/download</a></p> - - <p>Installing HSA enabled kernel + driver - Until all of the HSA drivers and features are available in stock linux and have been pulled down into Ubuntu distro we will need a special HSA enabled kernel image.</p> - - <h2>A Ubuntu compatible kernel can be pulled from github</h2> - <pre class="highlight shell"><code> - <span class="gp">$ </span><span class="nb">cd</span> ~ <span class="c"># I put all of this in my home dir</span> - <span class="gp">$ </span>sudo apt-get install git - <span class="gp">$ </span>git clone https://github.com/HSAFoundation/Linux-HSA-Drivers-And-Images-AMD.git - </code></pre> - <p>Or you can pull the zip and unzip using curl if you don’t have git</p> - <pre class="highlight shell"><code> - <span class="gp">$ </span><span class="nb">cd</span> ~ <span class="c"># I put all of this in my home dir</span> - <span class="gp">$ </span>curl -L https://github.com/HSAFoundation/Linux-HSA-Drivers-And-Images-AMD/archive/master.zip > drivers.zip - <span class="gp">$ </span>unzip drivers.zip - </code></pre> - <p>This will create the following subdir on your machine</p> - <pre class="highlight plaintext"><code> - Linux-HSA-Drivers-And-Images-AMD/ - LICENSE - README.md - ubuntu12.10-based-alpha1/ - xorg.conf - linux-image-3.13.0-kfd+_3.13.0-kfd+-2_amd64.deb - </code></pre> - <p>From here we can install our new image and setup the HSA KFD (the driver for HSA)and reboot to the new kernel.</p> - <pre class="highlight shell"><code> - <span class="gp">$ </span><span class="nb">cd</span> ~/Linux-HSA-Drivers-And-Images-AMD - <span class="gp">$ </span><span class="nb">echo</span> <span class="s2">"KERNEL==</span><span class="se">\"</span><span class="s2">kfd</span><span class="se">\"</span><span class="s2">, MODE=</span><span class="se">\"</span><span class="s2">0666</span><span class="se">\"</span><span class="s2">"</span> | sudo tee /etc/udev/rules.d/kfd.rules - <span class="gp">$ </span>sudo dpkg -i ubuntu13.10-based-alpha1/linux-image-3.13.0-kfd+_3.13.0-kfd+-2_amd64.deb - <span class="gp">$ </span>sudo cp ~/Linux-HSA-Drivers-And-Images-AMD/ubuntu13.10-based-alpha1/xorg.conf /etc/X11 - <span class="gp">$ </span>sudo reboot - </code></pre> - <h2>Installing OKRA RT</h2> - - <p>Now we need a runtime for executing HSAIL code. We share common infrastructure used by our sister OpenJDK project called Sumatra. Both Aparapi and Sumatra use OKRA to execute HSAIL code on a HSA enabled platform.</p> - - <p>We can get the latest version using of OKRA (Offloadable Kernel Runtime API) from another HSA foundation repository.</p> - <pre class="highlight shell"><code> - <span class="gp">$ </span><span class="nb">cd</span> ~ <span class="c"># I put all of this in my home dir</span> - <span class="gp">$ </span>git clone https://github.com/HSAFoundation/Okra-Interface-to-HSA-Device.git - </code></pre> - <p>or if you prefer curl/unzip</p> - <pre class="highlight shell"><code> - <span class="gp">$ </span><span class="nb">cd</span> ~ <span class="c"># I put all of this in my home dir</span> - <span class="gp">$ </span>curl -L https://github.com/HSAFoundation/Okra-Interface-to-HSA-Device/archive/master.zip > okra.zip - <span class="gp">$ </span>unzip okra.zip - </code></pre> - <p>This will create the following dir structure.</p> - <pre class="highlight java"><code> - <span class="n">Okra</span><span class="o">-</span><span class="n">Interface</span><span class="o">-</span><span class="n">to</span><span class="o">-</span><span class="n">HSA</span><span class="o">-</span><span class="n">Device</span><span class="o">/</span> - <span class="n">README</span><span class="o">.</span><span class="na">md</span> - <span class="n">okra</span><span class="o">/</span> - <span class="n">README</span> - <span class="n">dist</span><span class="o">/</span> - <span class="n">okra</span><span class="o">.</span><span class="na">jar</span> - <span class="n">bin</span><span class="o">/</span> - <span class="n">libamdhsacl64</span><span class="o">.</span><span class="na">so</span> - <span class="n">libnewhsacore64</span><span class="o">.</span><span class="na">so</span> - <span class="n">libokra_x86_64</span><span class="o">.</span><span class="na">so</span> - <span class="n">include</span><span class="o">/</span> - <span class="n">common</span><span class="o">.</span><span class="na">h</span> - <span class="n">okraContext</span><span class="o">.</span><span class="na">h</span> - - <span class="n">samples</span><span class="o">/</span> - <span class="n">dist</span><span class="o">/</span> - <span class="n">Squares</span> - <span class="n">Squares</span><span class="o">.</span><span class="na">hsail</span> - <span class="n">runSquares</span><span class="o">.</span><span class="na">sh</span> - </code></pre> - <p>OKRA offers a C API (for those that are so inclined ;) ) as well as a java jar file which contains JNI wrappers.</p> - - <h2>Sanity check your HSA and OKRA install</h2> - - <p>So to sanity check your install you can run a small sample app (binary)</p> - <pre class="highlight shell"><code> - <span class="gp">$ </span><span class="nb">cd</span> ~/Okra-Interface-to-HSA-Device/okra/samples/ - <span class="gp">$ </span>sh runSquares.sh - </code></pre> - <p>If everything is OK this should run the C Squares test app.</p> - - <p>Congratulations, you have executed your first HSA enabled app.</p> - - <p>Getting OpenCL headers and libraries - We need OpenCL headers and libraries to build Aparapi (remember we still support OpenCL).</p> - - <p>My recommendation is to download AMD-APP-SDK-v2.9-lnx64.tgz from <a href="http://developer.amd.com/tools-and-sdks/heterogeneous-computing/amd-accelerated-parallel-processing-app-sdk/downloads" rel="nofollow">http://developer.amd.com/tools-and-sdks/heterogeneous-computing/amd-accelerated-parallel-processing-app-sdk/downloads</a> and extract the libraries and headers.</p> - - <p>Note that we have nested zipped jars in this archive.</p> - <pre class="highlight shell"><code> - <span class="gp">$ </span><span class="nb">cd</span> ~ - <span class="gp">$ </span>gunzip ~/Downloads/AMD-APP-SDK-v2.9-lnx64.tgz - <span class="gp">$ </span>tar xvf ~/Downloads/AMD-APP-SDK-v2.9-lnx64.tar - <span class="gp">$ </span>rm ~/default-install_lnx_64.pl ~/icd-registration.tgz ~/Install-AMD-APP.sh ~/ReadMe.txt - <span class="gp">$ </span>gunzip ~/AMD-APP-SDK-v2.9-RC-lnx64.tgz - <span class="gp">$ </span>tar xvf ~/AMD-APP-SDK-v2.9-RC-lnx64.tar - <span class="gp">$ </span>rm ~/AMD-APP-SDK-v2.9-RC-lnx64.tar - <span class="gp">$ </span>rm -rf AMD-APP-SDK-v2.9-RC-lnx64/samples - </code></pre> - <p>Note where AMD-APP-SDK-v2.9-RC-lnx64 is located, you need this in the following step.</p> - - <h2>You will need Java 8</h2> - - <p>Download Java 8 JDK from <a href="https://jdk8.java.net/download.html" rel="nofollow">https://jdk8.java.net/download.html</a> I chose to download the zipped tar and not install with RPM so I can control the location of the install.</p> - <pre class="highlight shell"><code> - <span class="gp">$ </span><span class="nb">cd</span> ~ - <span class="gp">$ </span>gunzip /home/gfrost/Downloads/jdk-8-fcs-bin-b132-linux-x64-04_mar_2014.tar.gz - <span class="gp">$ </span>tar xvf ~/Downloads/jdk-8-fcs-bin-b132-linux-x64-04_mar_2014.tar - </code></pre> - <p>I now have ~/jdk1.8.0 as my java 8 install dir.</p> - - <p>Alternatively the following will pull from Oracles site using curl</p> - <pre class="highlight shell"><code> - <span class="gp">$ </span><span class="nb">cd</span> ~ - <span class="gp">$ </span>curl http://download.java.net/jdk8/archive/b132/binaries/jdk-8-fcs-bin-b132-linux-x64-04_mar_2014.tar.gz?q<span class="o">=</span>download/jdk8/archive/b132/binaries/jdk-8-fcs-bin-b132-linux-x64-04_mar_2014.tar.gz > jdk-8-fcs-bin-b132-linux-x64-04_mar_2014.tar.gz - <span class="gp">$ </span>gunzip jdk-8-fcs-bin-b132-linux-x64-04_mar_2014.tar.gz - <span class="gp">$ </span>tar xvf jdk-8-fcs-bin-b132-linux-x64-04_mar_2014.tar - </code></pre> - <p>I now have ~/jdk1.8.0 as my java 8 install dir.</p> - - <h2>You will need g++</h2> - - <p>We use g++ to build the JNI side of Aparapi</p> - <pre class="highlight shell"><code> - <span class="gp">$ </span>sudo apt-get install g++ - </code></pre> - <h2>Pulling the HSA enabled Aparapi branch and building</h2> - - <p>Now we can pull the Aparapi lambda/HSA branch from GIT</p> - <pre class="highlight plaintext"><code>$ sudo apt-get install git - $ git clone https://github.com/Syncleus/aparapi-ambda.git - </code></pre> - <p>If you are familiar with Aparapi structure then this tree should not be that much of a surprise but there are a few subtle changes.</p> - - <p>Specifically the build system has been changed to support OKRA, Aparapi JNI code is provided as a Java agent and the execution scripts all refer to ${APARAPI_HOME}/env.sh to setup a reasonable execution environment.</p> - - <p>You will need to edit env.sh and make sure that APARAPI<em>HOME, OKRA</em>HOME, OCL<em>HOME and JAVA</em>HOME correctly.</p> - - <p>Here are how I set my vars.</p> - - <table><thead> - <tr> - <th>environment variable</th> - <th>value</th> - </tr> - </thead><tbody> - <tr> - <td>JAVA_HOME</td> - <td>/home/${LOGNAME}/jdk1.8.0</td> - </tr> - <tr> - <td>OCL_HOME</td> - <td>/home/${LOGNAME}/AMD-APP-SDK-v2.9-RC-lnx64</td> - </tr> - <tr> - <td>APARAPI_HOME</td> - <td>/home/${LOGNAME}/aparapi-lambda</td> - </tr> - <tr> - <td>OKRA_HOME</td> - <td>/home/${LOGNAME}/Okra-Interface-to-HSA-Device/okra/</td> - </tr> - </tbody></table> - - <p>It is recommended (thanks notzed ;) ) that you test your env.sh using sh env.sh until it stops reporting errors. Once you have finished I recommend sourcing it into your current shell before building with ant.</p> - <pre class="highlight java"><code> - <span class="err">$</span> <span class="n">cd</span> <span class="o">~</span><span class="n">aparapi</span><span class="o">-</span><span class="n">lambda</span> - <span class="err">$</span> <span class="o">.</span> <span class="n">env</span><span class="o">.</span><span class="na">sh</span> - <span class="err">$</span> <span class="n">mvn</span> - </code></pre> - <p>If you get any problems check the env.sh vars first.</p> - - <p>If all is well you should be able to run some samples.</p> - <pre class="highlight java"><code> - <span class="err">$</span> <span class="n">cd</span> <span class="o">~/</span><span class="n">aparapi</span><span class="o">-</span><span class="n">lambda</span><span class="o">/</span><span class="n">samples</span><span class="o">/</span><span class="n">mandel</span> - <span class="err">$</span> <span class="n">sh</span> <span class="n">hsailmandel</span><span class="o">.</span><span class="na">sh</span> - </code></pre> - </div> - </main> - <footer class='page-footer'> - <div class='container'> - <div class='row'> - <div class='col l4 s12'> - <h5 class='white-text'>Help Aparapi Grow</h5> - <p class='grey-text text-lighten-4'>We are a team of volunteers working on this project like it's our full time job. Any amount would help support and continue development on this project and is greatly appreciated.</p> - <form action='https://www.paypal.com/cgi-bin/webscr' id='paypal-donate' method='post' target='_top'> - <input name='cmd' type='hidden' value='_s-xclick'> - <input name='encrypted' type='hidden' value='-----BEGIN PKCS7-----MIIHoAYJKoZIhvcNAQcEoIIHkTCCB40CAQExggEwMIIBLAIBADCBlDCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20CAQAwDQYJKoZIhvcNAQEBBQAEgYATcKxN8t35TG2x34eY272SuZO3QbGy+BTGIM5DRV6Hmosotzw2TF42ceWmbXb3Gk4Wy5kUgo4TgHExCZHUSlHUl+A9KWLFejotgQJPhbiBsnns3klWbKftA3LEnP/kz/SW7OyBlpluoHoEGb354/aoX3JEctp3akHiZEmD7JyEgjELMAkGBSsOAwIaBQAwggEcBgkqhkiG9w0BBwEwFAYIKoZIhvcNAwcECOGCJwba6JICgIH4RtE1LE3juagKs+swI5tb9Y2LacWo+qn1H1aLKeg57bQMqqcWYvkoO1joYoglPc1h4mO0egZjHPQ6ih0K0IYlXw2SRpNylSlIMUE3GW6smjSSwRhscZfXQYUnmQsfYvkFwoKrlZGf/1u0Q7nwlZ1szIKnDMZ5f+k8xBcM0sMNutn/y9CH6A3zo01gQBIF29+1WYAoQspNAnfWQy3ydV7nbjIA9ThDp2WquWw3EVlvqlvm/3C2AFuH/L4q0ltn3qjkCdzXK0O2jW3TRrzligPkAy6CN0Tw2jGW5GENNC1L92vHFH4kBXUPlhvw39TgoN7/KRUjVoYPYgugggOHMIIDgzCCAuygAwIBAgIBADANBgkqhkiG9w0BAQUFADCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20wHhcNMDQwMjEzMTAxMzE1WhcNMzUwMjEzMTAxMzE1WjCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20wgZ8wDQYJKoZIhvcNAQEBBQADgY0AMIGJAoGBAMFHTt38RMxLXJyO2SmS+Ndl72T7oKJ4u4uw+6awntALWh03PewmIJuzbALScsTS4sZoS1fKciBGoh11gIfHzylvkdNe/hJl66/RGqrj5rFb08sAABNTzDTiqqNpJeBsYs/c2aiGozptX2RlnBktH+SUNpAajW724Nv2Wvhif6sFAgMBAAGjge4wgeswHQYDVR0OBBYEFJaffLvGbxe9WT9S1wob7BDWZJRrMIG7BgNVHSMEgbMwgbCAFJaffLvGbxe9WT9S1wob7BDWZJRroYGUpIGRMIGOMQswCQYDVQQGEwJVUzELMAkGA1UECBMCQ0ExFjAUBgNVBAcTDU1vdW50YWluIFZpZXcxFDASBgNVBAoTC1BheVBhbCBJbmMuMRMwEQYDVQQLFApsaXZlX2NlcnRzMREwDwYDVQQDFAhsaXZlX2FwaTEcMBoGCSqGSIb3DQEJARYNcmVAcGF5cGFsLmNvbYIBADAMBgNVHRMEBTADAQH/MA0GCSqGSIb3DQEBBQUAA4GBAIFfOlaagFrl71+jq6OKidbWFSE+Q4FqROvdgIONth+8kSK//Y/4ihuE4Ymvzn5ceE3S/iBSQQMjyvb+s2TWbQYDwcp129OPIbD9epdr4tJOUNiSojw7BHwYRiPh58S1xGlFgHFXwrEBb3dgNbMUa+u4qectsMAXpVHnD9wIyfmHMYIBmjCCAZYCAQEwgZQwgY4xCzAJBgNVBAYTAlVTMQswCQYDVQQIEwJDQTEWMBQGA1UEBxMNTW91bnRhaW4gVmlldzEUMBIGA1UEChMLUGF5UGFsIEluYy4xEzARBgNVBAsUCmxpdmVfY2VydHMxETAPBgNVBAMUCGxpdmVfYXBpMRwwGgYJKoZIhvcNAQkBFg1yZUBwYXlwYWwuY29tAgEAMAkGBSsOAwIaBQCgXTAYBgkqhkiG9w0BCQMxCwYJKoZIhvcNAQcBMBwGCSqGSIb3DQEJBTEPFw0xNjExMjkyMjA1NTNaMCMGCSqGSIb3DQEJBDEWBBS4i3Exr/pFcKOJy8uKmH+nGIMjqDANBgkqhkiG9w0BAQEFAASBgDAbFZ2jieloeB/0wCAcvYCFAIXmmBaMS5js/byzU5gK7exSTlRMX74IkmHemItaOcw3wyFlu4i118D9K9SbSbFiX9DGDcezGh42u/6G8TuZMwlvmiehwMioTVcm4jWG40YLiv8pJZypfoSx2w4IAFb4na5i/E1qOrwQOpiBho+s-----END PKCS7-----'> - <button alt='PayPal - The safer, easier way to pay online!' class='btn waves-effect waves-light red lighten-3' name='action' type='submit'> - Donate Now - </button> - </form> - </div> - <div class='col l4 s12'> - <h5 class='white-text'>Join the Discussion</h5> - <p class='grey-text text-lighten-4'>We have a Gitter chat room set up where you can talk directly with us. Come in and discuss new features, future goals, general problems or questions, or anything else you can think of.</p> - <a class='btn waves-effect waves-light red lighten-3' href='https://gitter.im/Syncleus/aparapi' target='_blank'>Chat</a> - </div> - <div class='col l4 s12' style='overflow: hidden;'> - <h5 class='white-text'>Connect</h5> - <iframe allowtransparency='true' frameborder='0' height='30' scrolling='0' src='http://ghbtns.com/github-btn.html?user=Syncleus&repo=aparapi&type=watch&count=true&size=large' width='170'></iframe> - <br> - <a class='twitter-follow-button' data-dnt='true' data-show-count='true' data-size='large' href='https://twitter.com/AparapiLib'>Follow @AparapiLib</a> - <br> - <div class='g-follow' data-annotation='bubble' data-height='24' data-href='https://plus.google.com/102266131584900704956' data-rel='publisher'></div> - </div> - </div> - </div> - <div class='footer-copyright'> - <div class='container'> - © 2016-2017 Syncleus, All rights reserved. - <a class='grey-text text-lighten-4 right' href='https://github.com/Syncleus/aparapi/blob/master/LICENSE'>Apache License v2</a> - </div> - </div> - </footer> - <!-- Scripts --> - <script src='https://code.jquery.com/jquery-2.1.4.min.js'></script> - <script> - if (!window.jQuery) { document.write('<script src="bin/jquery-2.1.1.min.js"><\/script>'); } - </script> - <script src='/javascripts/jquery.timeago.js'></script> - <script src='/javascripts/materialize.min.js'></script> - <script src='/javascripts/lunr.min.js'></script> - <script src='/javascripts/search.js'></script> - <script src='/javascripts/materialize.js'></script> - <script src='/javascripts/init.js'></script> - <!-- Twitter Button --> - <script> - !function(d,s,id){var js,fjs=d.getElementsByTagName(s)[0],p=/^http:/.test(d.location)?'http':'https';if(!d.getElementById(id)){js=d.createElement(s);js.id=id;js.src=p+'://platform.twitter.com/widgets.js';fjs.parentNode.insertBefore(js,fjs);}}(document, 'script', 'twitter-wjs'); - </script> - <!-- Google Plus Button --> - <script async='' defer='defer' src='https://apis.google.com/js/platform.js'></script> - </body> +<head> +<meta content='text/html; charset=UTF-8' http-equiv='Content-Type'> +<meta content='width=device-width, initial-scale=1' name='viewport'> +<meta content='IE=edge' http-equiv='X-UA-Compatible'> +<meta content='no' name='msapplication-tap-highlight'> +<meta content='Aparapi is an Open-source framework for executing native Java code on the GPU, developed by Syncleus.' name='description'> +<title> +Aparapi | Setting Up HSA +</title> +<!-- Favicons --> +<link href='/images/favicon/apple-touch-icon-152x152.png' rel='apple-touch-icon-precomposed'> +<meta content='#FFFFFF' name='msapplication-TileColor'> +<meta content='/images/favicon/mstile-144x144.png' name='msapplication-TileImage'> +<link href='/images/favicon/favicon-32x32.png' rel='icon' sizes='32x32'> +<!-- Android 5 Chrome Color --> +<meta content='#EE6E73' name='theme-color'> +<!-- CSS --> +<link href='/stylesheets/highlight.css' media='screen,projection' rel='stylesheet' type='text/css'> +<link href='/stylesheets/style.css' media='screen,projection' rel='stylesheet' type='text/css'> +<link href='http://fonts.googleapis.com/css?family=Inconsolata' rel='stylesheet' type='text/css'> +<link href='http://fonts.googleapis.com/icon?family=Material+Icons' rel='stylesheet'> +</head> +<body> +<header> +<div class='container'> +<a class='button-collapse top-nav waves-effect waves-light circle hide-on-large-only' data-activates='nav-mobile' href='#'> +<i class='material-icons'>menu</i> +</a> +</div> +<ul class='side-nav fixed' id='nav-mobile'> +<li class='logo'> +<a class='brand-logo' href='/' id='logo-container'> +<object data='/images/logo.svg' id='front-page-logo' type='image/svg+xml'>Your browser does not support SVG</object> +</a> +</li> +<li class='search'> +<div class='search-wrapper card'> +<input id='search'> +<i class='material-icons'>search</i> +<div class='search-results'></div> +</div> +</li> +<li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/">Overview</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Introduction</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/about.html">About</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/getting-started.html">Getting Started</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/faq.html">FAQ</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Documentation</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/aparapi-patterns.html">Aparapi Patterns</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/choosing-specific-devices.html">Choosing Specific Devices</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/converting-java-to-opencl.html">Converting Java to OpenCL</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/emulating-multiple-entrypoints.html">Emulating Multiple Entrypoints</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/explicit-buffer-handling.html">Explicit Buffer Handling</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/hsa-enabled-lambda.html">HSA Enabled Lambda</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/kernel-guidelines.html">Kernel Guidelines</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/library-agent-duality.html">Library Agent Duality</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/new-features.html">New Features</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/opencl-bindings.html">OpenCL Bindings</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/private-memory-space.html">Private Memory Space</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/profiling-the-kernel.html">Profiling the Kernel</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/setting-up-hsa.html">Setting Up HSA</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/unit-tests.html">Unit Tests</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/using-hsa-simulator.html">Using HSA Simulator</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/constant-memory.html">Constant Memory</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/local-memory.html">Local Memory</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/multiple-dim-ranges.html">Multiple Dim Ranges</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Proposals</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/multiple-dim-nd-range.html">Multiple Dim ND Range</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/lambdas.html">Lambdas</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/address-space-with-buffers.html">Address Space with Buffers</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/extensions.html">Extensions</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/device.html">Device</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/multiple-entry-points.html">Multiple Entry Points</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/lambda-syntax.html">Lambda Syntax</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/showcase.html">Showcase</a></li></ul></li> +</ul> +</header> +<main> +<div class='section no-pad-bot' id='index-banner'> +<div class='container'> +<h1 class='header center-on-small-only'>Setting Up HSA</h1> +<div class='row center'> +<h4 class='header col s12 light center'>How to setup a HSA machine for testing HSA enabled Aparapi</h4> +</div> + +</div> + +</div> +<div class='container'> +<ul> +<li>HSA Videos + +<ul> +<li><a href="http://www.youtube.com/watch?v=caEPq4KvTTA" rel="nofollow">http://www.youtube.com/watch?v=caEPq4KvTTA</a></li> +</ul></li> +<li>HSA Articles + +<ul> +<li><a href="http://developer.amd.com/resources/heterogeneous-computing/what-is-heterogeneous-computing/" rel="nofollow">http://developer.amd.com/resources/heterogeneous-computing/what-is-heterogeneous-computing/</a></li> +</ul></li> +<li>HSA Foundation + +<ul> +<li><a href="https://github.com/HSAFoundation" rel="nofollow">https://github.com/HSAFoundation</a></li> +</ul></li> +</ul> + +<h2>Introduction</h2> + +<p>Now that HSA hardware is generally available I figured it was time to describe how to setup a HSA enabled Linux platform so that it can run Aparapi.</p> + +<p>Here is a nice introduction to HSA <a href="http://developer.amd.com/resources/heterogeneous-computing/what-is-heterogeneous-system-architecture-hsa/" rel="nofollow">http://developer.amd.com/resources/heterogeneous-computing/what-is-heterogeneous-system-architecture-hsa/</a></p> + +<p>But for Aparapi users the main advantage is that we are no longer limited to the GPU memory for running GPU tasks. Also because the CPU and the GPU can both see the same memory (the Java heap) Aparapi code can now access Java objects directly. This removes a number of Aparapi constraints. So more of your code can now run on the GPU.</p> + +<h2>Hardware Required</h2> + +<p>These instructions were based on my experience setting up a platform using the following hardware.</p> + +<table><thead> +<tr> +<th>Component</th> +<th>Suggested</th> +</tr> +</thead><tbody> +<tr> +<td>APU</td> +<td>AMD A10-7850K APU <a href="http://www.amd.com/us/products/desktop/processors/a-series/Pages/a-series-apu.aspx" rel="nofollow">http://www.amd.com/us/products/desktop/processors/a-series/Pages/a-series-apu.aspx</a></td> +</tr> +<tr> +<td>Motherboard</td> +<td>ASUS A88X-PRO or A88XM-A <a href="http://www.asus.com/Motherboards/A88XPRO" rel="nofollow">http://www.asus.com/Motherboards/A88XPRO</a> <a href="http://www.asus.com/Motherboards/A88XMA" rel="nofollow">http://www.asus.com/Motherboards/A88XMA</a></td> +</tr> +<tr> +<td>Memory</td> +<td>G.SKILL Ripjaws X Series 16GB (2 x 8GB) 240-Pin DDR3 SDRAM DDR3 2133</td> +</tr> +</tbody></table> + +<h2>Software Required</h2> + +<p>We also have some software dependencies.</p> + +<table><thead> +<tr> +<th>Component</th> +<th>Suggested</th> +</tr> +</thead><tbody> +<tr> +<td>Java 8 JDK</td> +<td><a href="http://www.oracle.com/technetwork/java/javase/downloads/ea-jsp-142245.html" rel="nofollow">http://www.oracle.com/technetwork/java/javase/downloads/ea-jsp-142245.html</a></td> +</tr> +<tr> +<td>Ubuntu 13.10 64-bit edition</td> +<td><a href="http://www.ubuntu.com/download" rel="nofollow">http://www.ubuntu.com/download</a></td> +</tr> +<tr> +<td>Ubuntu 13.10 64-bit edition HSA enabled kernel image</td> +<td><a href="https://github.com/HSAFoundation/Linux-HSA-Drivers-And-Images-AMD" rel="nofollow">https://github.com/HSAFoundation/Linux-HSA-Drivers-And-Images-AMD</a></td> +</tr> +<tr> +<td>OKRA HSA enabled runtime</td> +<td><a href="https://github.com/HSAFoundation/Okra-Interface-to-HSA-Device" rel="nofollow">https://github.com/HSAFoundation/Okra-Interface-to-HSA-Device</a></td> +</tr> +</tbody></table> + +<p>The hope is that the list of HW/SW support widens, but for early adopters this is the set of HW/SW we have been testing with.</p> + +<h1>Setting up your System</h1> + +<h2>Configure your BIOS to support IOMMU</h2> + +<p>Once you have built your AMD A10-7850K APU based system you should make sure that your system is configured to use IOMMU.</p> + +<p>Remember HSA allows the GPU and CPU cores to share the same memory. IOMMU needs to be enabled for this.</p> + +<h2>For the A88X-PRO board</h2> + +<p>For the recommended ASUS board above you will need to make sure that your BIOS is updated to version 0802. Here is a direct link to the 0802 version of the BIOS from ASUS’s site as of 2/28/2014.</p> + +<p><a href="http://dlcdnet.asus.com/pub/ASUS/mb/SocketFM2/A88X-PRO/A88X-PRO-ASUS-0802.zip" rel="nofollow">http://dlcdnet.asus.com/pub/ASUS/mb/SocketFM2/A88X-PRO/A88X-PRO-ASUS-0802.zip</a></p> + +<p>Once you have the latest BIOS you will need to enable IOMMU in the system BIOS. This is done using the “CPU Configuration” screen under “Advanced Mode” and then enabling IOMMU.</p> + +<h2>For the A88XM-A</h2> + +<p>You will need the 1102 (or later) version of the BIOS</p> + +<p><a href="http://dlcdnet.asus.com/pub/ASUS/mb/SocketFM2/A88XM-A/A88XM-A-ASUS-1102.zip" rel="nofollow">http://dlcdnet.asus.com/pub/ASUS/mb/SocketFM2/A88XM-A/A88XM-A-ASUS-1102.zip</a></p> + +<p>Once you have the latest BIOS you will need to enable IOMMU in the system BIOS. This is done using the “CPU Configuration” screen under “Advanced Mode” and then enabling IOMMU.</p> + +<h2>Installing Ubuntu 13.10</h2> + +<p>Once you have your BIOS setup you need to install Ubuntu <a href="http://www.ubuntu.com/download" rel="nofollow">http://www.ubuntu.com/download</a></p> + +<p>Installing HSA enabled kernel + driver +Until all of the HSA drivers and features are available in stock linux and have been pulled down into Ubuntu distro we will need a special HSA enabled kernel image.</p> + +<h2>A Ubuntu compatible kernel can be pulled from github</h2> +<div class="highlight"><pre class="highlight shell"><code> +<span class="nv">$ </span><span class="nb">cd</span> ~ <span class="c"># I put all of this in my home dir</span> +<span class="nv">$ </span><span class="nb">sudo </span>apt-get install git +<span class="nv">$ </span>git clone https://github.com/HSAFoundation/Linux-HSA-Drivers-And-Images-AMD.git +</code></pre></div> +<p>Or you can pull the zip and unzip using curl if you don’t have git</p> +<div class="highlight"><pre class="highlight shell"><code> +<span class="nv">$ </span><span class="nb">cd</span> ~ <span class="c"># I put all of this in my home dir</span> +<span class="nv">$ </span>curl <span class="nt">-L</span> https://github.com/HSAFoundation/Linux-HSA-Drivers-And-Images-AMD/archive/master.zip <span class="o">></span> drivers.zip +<span class="nv">$ </span>unzip drivers.zip +</code></pre></div> +<p>This will create the following subdir on your machine</p> +<div class="highlight"><pre class="highlight plaintext"><code> +Linux-HSA-Drivers-And-Images-AMD/ + LICENSE + README.md + ubuntu12.10-based-alpha1/ + xorg.conf + linux-image-3.13.0-kfd+_3.13.0-kfd+-2_amd64.deb +</code></pre></div> +<p>From here we can install our new image and setup the HSA KFD (the driver for HSA)and reboot to the new kernel.</p> +<div class="highlight"><pre class="highlight shell"><code> +<span class="nv">$ </span><span class="nb">cd</span> ~/Linux-HSA-Drivers-And-Images-AMD +<span class="nv">$ </span><span class="nb">echo</span> <span class="s2">"KERNEL==</span><span class="se">\"</span><span class="s2">kfd</span><span class="se">\"</span><span class="s2">, MODE=</span><span class="se">\"</span><span class="s2">0666</span><span class="se">\"</span><span class="s2">"</span> | <span class="nb">sudo </span>tee /etc/udev/rules.d/kfd.rules +<span class="nv">$ </span><span class="nb">sudo </span>dpkg <span class="nt">-i</span> ubuntu13.10-based-alpha1/linux-image-3.13.0-kfd+_3.13.0-kfd+-2_amd64.deb +<span class="nv">$ </span><span class="nb">sudo </span>cp ~/Linux-HSA-Drivers-And-Images-AMD/ubuntu13.10-based-alpha1/xorg.conf /etc/X11 +<span class="nv">$ </span><span class="nb">sudo </span>reboot +</code></pre></div> +<h2>Installing OKRA RT</h2> + +<p>Now we need a runtime for executing HSAIL code. We share common infrastructure used by our sister OpenJDK project called Sumatra. Both Aparapi and Sumatra use OKRA to execute HSAIL code on a HSA enabled platform.</p> + +<p>We can get the latest version using of OKRA (Offloadable Kernel Runtime API) from another HSA foundation repository.</p> +<div class="highlight"><pre class="highlight shell"><code> +<span class="nv">$ </span><span class="nb">cd</span> ~ <span class="c"># I put all of this in my home dir</span> +<span class="nv">$ </span>git clone https://github.com/HSAFoundation/Okra-Interface-to-HSA-Device.git +</code></pre></div> +<p>or if you prefer curl/unzip</p> +<div class="highlight"><pre class="highlight shell"><code> +<span class="nv">$ </span><span class="nb">cd</span> ~ <span class="c"># I put all of this in my home dir</span> +<span class="nv">$ </span>curl <span class="nt">-L</span> https://github.com/HSAFoundation/Okra-Interface-to-HSA-Device/archive/master.zip <span class="o">></span> okra.zip +<span class="nv">$ </span>unzip okra.zip +</code></pre></div> +<p>This will create the following dir structure.</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="n">Okra</span><span class="o">-</span><span class="n">Interface</span><span class="o">-</span><span class="n">to</span><span class="o">-</span><span class="n">HSA</span><span class="o">-</span><span class="n">Device</span><span class="o">/</span> + <span class="n">README</span><span class="o">.</span><span class="na">md</span> + <span class="n">okra</span><span class="o">/</span> + <span class="n">README</span> + <span class="n">dist</span><span class="o">/</span> + <span class="n">okra</span><span class="o">.</span><span class="na">jar</span> + <span class="n">bin</span><span class="o">/</span> + <span class="n">libamdhsacl64</span><span class="o">.</span><span class="na">so</span> + <span class="n">libnewhsacore64</span><span class="o">.</span><span class="na">so</span> + <span class="n">libokra_x86_64</span><span class="o">.</span><span class="na">so</span> + <span class="n">include</span><span class="o">/</span> + <span class="n">common</span><span class="o">.</span><span class="na">h</span> + <span class="n">okraContext</span><span class="o">.</span><span class="na">h</span> + + <span class="n">samples</span><span class="o">/</span> + <span class="n">dist</span><span class="o">/</span> + <span class="n">Squares</span> + <span class="n">Squares</span><span class="o">.</span><span class="na">hsail</span> + <span class="n">runSquares</span><span class="o">.</span><span class="na">sh</span> +</code></pre></div> +<p>OKRA offers a C API (for those that are so inclined ;) ) as well as a java jar file which contains JNI wrappers.</p> + +<h2>Sanity check your HSA and OKRA install</h2> + +<p>So to sanity check your install you can run a small sample app (binary)</p> +<div class="highlight"><pre class="highlight shell"><code> +<span class="nv">$ </span><span class="nb">cd</span> ~/Okra-Interface-to-HSA-Device/okra/samples/ +<span class="nv">$ </span>sh runSquares.sh +</code></pre></div> +<p>If everything is OK this should run the C Squares test app.</p> + +<p>Congratulations, you have executed your first HSA enabled app.</p> + +<p>Getting OpenCL headers and libraries +We need OpenCL headers and libraries to build Aparapi (remember we still support OpenCL).</p> + +<p>My recommendation is to download AMD-APP-SDK-v2.9-lnx64.tgz from <a href="http://developer.amd.com/tools-and-sdks/heterogeneous-computing/amd-accelerated-parallel-processing-app-sdk/downloads" rel="nofollow">http://developer.amd.com/tools-and-sdks/heterogeneous-computing/amd-accelerated-parallel-processing-app-sdk/downloads</a> and extract the libraries and headers.</p> + +<p>Note that we have nested zipped jars in this archive.</p> +<div class="highlight"><pre class="highlight shell"><code> +<span class="nv">$ </span><span class="nb">cd</span> ~ +<span class="nv">$ </span>gunzip ~/Downloads/AMD-APP-SDK-v2.9-lnx64.tgz +<span class="nv">$ </span><span class="nb">tar </span>xvf ~/Downloads/AMD-APP-SDK-v2.9-lnx64.tar +<span class="nv">$ </span>rm ~/default-install_lnx_64.pl ~/icd-registration.tgz ~/Install-AMD-APP.sh ~/ReadMe.txt +<span class="nv">$ </span>gunzip ~/AMD-APP-SDK-v2.9-RC-lnx64.tgz +<span class="nv">$ </span><span class="nb">tar </span>xvf ~/AMD-APP-SDK-v2.9-RC-lnx64.tar +<span class="nv">$ </span>rm ~/AMD-APP-SDK-v2.9-RC-lnx64.tar +<span class="nv">$ </span>rm <span class="nt">-rf</span> AMD-APP-SDK-v2.9-RC-lnx64/samples +</code></pre></div> +<p>Note where AMD-APP-SDK-v2.9-RC-lnx64 is located, you need this in the following step.</p> + +<h2>You will need Java 8</h2> + +<p>Download Java 8 JDK from <a href="https://jdk8.java.net/download.html" rel="nofollow">https://jdk8.java.net/download.html</a> I chose to download the zipped tar and not install with RPM so I can control the location of the install.</p> +<div class="highlight"><pre class="highlight shell"><code> +<span class="nv">$ </span><span class="nb">cd</span> ~ +<span class="nv">$ </span>gunzip /home/gfrost/Downloads/jdk-8-fcs-bin-b132-linux-x64-04_mar_2014.tar.gz +<span class="nv">$ </span><span class="nb">tar </span>xvf ~/Downloads/jdk-8-fcs-bin-b132-linux-x64-04_mar_2014.tar +</code></pre></div> +<p>I now have ~/jdk1.8.0 as my java 8 install dir.</p> + +<p>Alternatively the following will pull from Oracles site using curl</p> +<div class="highlight"><pre class="highlight shell"><code> +<span class="nv">$ </span><span class="nb">cd</span> ~ +<span class="nv">$ </span>curl http://download.java.net/jdk8/archive/b132/binaries/jdk-8-fcs-bin-b132-linux-x64-04_mar_2014.tar.gz?q<span class="o">=</span>download/jdk8/archive/b132/binaries/jdk-8-fcs-bin-b132-linux-x64-04_mar_2014.tar.gz <span class="o">></span> jdk-8-fcs-bin-b132-linux-x64-04_mar_2014.tar.gz +<span class="nv">$ </span>gunzip jdk-8-fcs-bin-b132-linux-x64-04_mar_2014.tar.gz +<span class="nv">$ </span><span class="nb">tar </span>xvf jdk-8-fcs-bin-b132-linux-x64-04_mar_2014.tar +</code></pre></div> +<p>I now have ~/jdk1.8.0 as my java 8 install dir.</p> + +<h2>You will need g++</h2> + +<p>We use g++ to build the JNI side of Aparapi</p> +<div class="highlight"><pre class="highlight shell"><code> +<span class="nv">$ </span><span class="nb">sudo </span>apt-get install g++ +</code></pre></div> +<h2>Pulling the HSA enabled Aparapi branch and building</h2> + +<p>Now we can pull the Aparapi lambda/HSA branch from GIT</p> +<div class="highlight"><pre class="highlight plaintext"><code>$ sudo apt-get install git +$ git clone https://github.com/Syncleus/aparapi-ambda.git +</code></pre></div> +<p>If you are familiar with Aparapi structure then this tree should not be that much of a surprise but there are a few subtle changes.</p> + +<p>Specifically the build system has been changed to support OKRA, Aparapi JNI code is provided as a Java agent and the execution scripts all refer to ${APARAPI_HOME}/env.sh to setup a reasonable execution environment.</p> + +<p>You will need to edit env.sh and make sure that APARAPI<em>HOME, OKRA</em>HOME, OCL<em>HOME and JAVA</em>HOME correctly.</p> + +<p>Here are how I set my vars.</p> + +<table><thead> +<tr> +<th>environment variable</th> +<th>value</th> +</tr> +</thead><tbody> +<tr> +<td>JAVA_HOME</td> +<td>/home/${LOGNAME}/jdk1.8.0</td> +</tr> +<tr> +<td>OCL_HOME</td> +<td>/home/${LOGNAME}/AMD-APP-SDK-v2.9-RC-lnx64</td> +</tr> +<tr> +<td>APARAPI_HOME</td> +<td>/home/${LOGNAME}/aparapi-lambda</td> +</tr> +<tr> +<td>OKRA_HOME</td> +<td>/home/${LOGNAME}/Okra-Interface-to-HSA-Device/okra/</td> +</tr> +</tbody></table> + +<p>It is recommended (thanks notzed ;) ) that you test your env.sh using sh env.sh until it stops reporting errors. Once you have finished I recommend sourcing it into your current shell before building with ant.</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="err">$</span> <span class="n">cd</span> <span class="o">~</span><span class="n">aparapi</span><span class="o">-</span><span class="n">lambda</span> +<span class="err">$</span> <span class="o">.</span> <span class="n">env</span><span class="o">.</span><span class="na">sh</span> +<span class="err">$</span> <span class="n">mvn</span> +</code></pre></div> +<p>If you get any problems check the env.sh vars first.</p> + +<p>If all is well you should be able to run some samples.</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="err">$</span> <span class="n">cd</span> <span class="o">~/</span><span class="n">aparapi</span><span class="o">-</span><span class="n">lambda</span><span class="o">/</span><span class="n">samples</span><span class="o">/</span><span class="n">mandel</span> +<span class="err">$</span> <span class="n">sh</span> <span class="n">hsailmandel</span><span class="o">.</span><span class="na">sh</span> +</code></pre></div> +</div> +</main> +<footer class='page-footer'> +<div class='container'> +<div class='row'> +<div class='col l4 s12'> +<h5 class='white-text'>Help Aparapi Grow</h5> +<p class='grey-text text-lighten-4'>We are a team of volunteers working on this project like it's our full time job. Any amount would help support and continue development on this project and is greatly appreciated.</p> +<form action='https://www.paypal.com/cgi-bin/webscr' id='paypal-donate' method='post' target='_top'> +<input name='cmd' type='hidden' value='_s-xclick'> +<input name='encrypted' type='hidden' value='-----BEGIN PKCS7-----MIIHoAYJKoZIhvcNAQcEoIIHkTCCB40CAQExggEwMIIBLAIBADCBlDCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20CAQAwDQYJKoZIhvcNAQEBBQAEgYATcKxN8t35TG2x34eY272SuZO3QbGy+BTGIM5DRV6Hmosotzw2TF42ceWmbXb3Gk4Wy5kUgo4TgHExCZHUSlHUl+A9KWLFejotgQJPhbiBsnns3klWbKftA3LEnP/kz/SW7OyBlpluoHoEGb354/aoX3JEctp3akHiZEmD7JyEgjELMAkGBSsOAwIaBQAwggEcBgkqhkiG9w0BBwEwFAYIKoZIhvcNAwcECOGCJwba6JICgIH4RtE1LE3juagKs+swI5tb9Y2LacWo+qn1H1aLKeg57bQMqqcWYvkoO1joYoglPc1h4mO0egZjHPQ6ih0K0IYlXw2SRpNylSlIMUE3GW6smjSSwRhscZfXQYUnmQsfYvkFwoKrlZGf/1u0Q7nwlZ1szIKnDMZ5f+k8xBcM0sMNutn/y9CH6A3zo01gQBIF29+1WYAoQspNAnfWQy3ydV7nbjIA9ThDp2WquWw3EVlvqlvm/3C2AFuH/L4q0ltn3qjkCdzXK0O2jW3TRrzligPkAy6CN0Tw2jGW5GENNC1L92vHFH4kBXUPlhvw39TgoN7/KRUjVoYPYgugggOHMIIDgzCCAuygAwIBAgIBADANBgkqhkiG9w0BAQUFADCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20wHhcNMDQwMjEzMTAxMzE1WhcNMzUwMjEzMTAxMzE1WjCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20wgZ8wDQYJKoZIhvcNAQEBBQADgY0AMIGJAoGBAMFHTt38RMxLXJyO2SmS+Ndl72T7oKJ4u4uw+6awntALWh03PewmIJuzbALScsTS4sZoS1fKciBGoh11gIfHzylvkdNe/hJl66/RGqrj5rFb08sAABNTzDTiqqNpJeBsYs/c2aiGozptX2RlnBktH+SUNpAajW724Nv2Wvhif6sFAgMBAAGjge4wgeswHQYDVR0OBBYEFJaffLvGbxe9WT9S1wob7BDWZJRrMIG7BgNVHSMEgbMwgbCAFJaffLvGbxe9WT9S1wob7BDWZJRroYGUpIGRMIGOMQswCQYDVQQGEwJVUzELMAkGA1UECBMCQ0ExFjAUBgNVBAcTDU1vdW50YWluIFZpZXcxFDASBgNVBAoTC1BheVBhbCBJbmMuMRMwEQYDVQQLFApsaXZlX2NlcnRzMREwDwYDVQQDFAhsaXZlX2FwaTEcMBoGCSqGSIb3DQEJARYNcmVAcGF5cGFsLmNvbYIBADAMBgNVHRMEBTADAQH/MA0GCSqGSIb3DQEBBQUAA4GBAIFfOlaagFrl71+jq6OKidbWFSE+Q4FqROvdgIONth+8kSK//Y/4ihuE4Ymvzn5ceE3S/iBSQQMjyvb+s2TWbQYDwcp129OPIbD9epdr4tJOUNiSojw7BHwYRiPh58S1xGlFgHFXwrEBb3dgNbMUa+u4qectsMAXpVHnD9wIyfmHMYIBmjCCAZYCAQEwgZQwgY4xCzAJBgNVBAYTAlVTMQswCQYDVQQIEwJDQTEWMBQGA1UEBxMNTW91bnRhaW4gVmlldzEUMBIGA1UEChMLUGF5UGFsIEluYy4xEzARBgNVBAsUCmxpdmVfY2VydHMxETAPBgNVBAMUCGxpdmVfYXBpMRwwGgYJKoZIhvcNAQkBFg1yZUBwYXlwYWwuY29tAgEAMAkGBSsOAwIaBQCgXTAYBgkqhkiG9w0BCQMxCwYJKoZIhvcNAQcBMBwGCSqGSIb3DQEJBTEPFw0xNjExMjkyMjA1NTNaMCMGCSqGSIb3DQEJBDEWBBS4i3Exr/pFcKOJy8uKmH+nGIMjqDANBgkqhkiG9w0BAQEFAASBgDAbFZ2jieloeB/0wCAcvYCFAIXmmBaMS5js/byzU5gK7exSTlRMX74IkmHemItaOcw3wyFlu4i118D9K9SbSbFiX9DGDcezGh42u/6G8TuZMwlvmiehwMioTVcm4jWG40YLiv8pJZypfoSx2w4IAFb4na5i/E1qOrwQOpiBho+s-----END PKCS7----- +'> +<button alt='PayPal - The safer, easier way to pay online!' class='btn waves-effect waves-light red lighten-3' name='action' type='submit'> +Donate Now +</button> +</form> +</div> +<div class='col l4 s12'> +<h5 class='white-text'>Join the Discussion</h5> +<p class='grey-text text-lighten-4'>We have a Gitter chat room set up where you can talk directly with us. Come in and discuss new features, future goals, general problems or questions, or anything else you can think of.</p> +<a class='btn waves-effect waves-light red lighten-3' href='https://gitter.im/Syncleus/aparapi' target='_blank'>Chat</a> +</div> +<div class='col l4 s12' style='overflow: hidden;'> +<h5 class='white-text'>Connect</h5> +<iframe allowtransparency='true' frameborder='0' height='30' scrolling='0' src='http://ghbtns.com/github-btn.html?user=Syncleus&repo=aparapi&type=watch&count=true&size=large' width='170'></iframe> +<br> +<a class='twitter-follow-button' data-dnt='true' data-show-count='true' data-size='large' href='https://twitter.com/AparapiLib'>Follow @AparapiLib</a> +<br> +<div class='g-follow' data-annotation='bubble' data-height='24' data-href='https://plus.google.com/102266131584900704956' data-rel='publisher'></div> +</div> +</div> +</div> +<div class='footer-copyright'> +<div class='container'> +© 2016-2017 Syncleus, All rights reserved. +<a class='grey-text text-lighten-4 right' href='https://github.com/Syncleus/aparapi/blob/master/LICENSE'>Apache License v2</a> +</div> +</div> +</footer> +<!-- Scripts --> +<script src='https://code.jquery.com/jquery-2.1.4.min.js'></script> +<script> + if (!window.jQuery) { document.write('<script src="bin/jquery-2.1.1.min.js"><\/script>'); } +</script> +<script src='/javascripts/jquery.timeago.js'></script> +<script src='/javascripts/materialize.min.js'></script> +<script src='/javascripts/lunr.min.js'></script> +<script src='/javascripts/search.js'></script> +<script src='/javascripts/materialize.js'></script> +<script src='/javascripts/init.js'></script> +<!-- Twitter Button --> +<script> + !function(d,s,id){var js,fjs=d.getElementsByTagName(s)[0],p=/^http:/.test(d.location)?'http':'https';if(!d.getElementById(id)){js=d.createElement(s);js.id=id;js.src=p+'://platform.twitter.com/widgets.js';fjs.parentNode.insertBefore(js,fjs);}}(document, 'script', 'twitter-wjs'); +</script> +<!-- Google Plus Button --> +<script async='' defer='defer' src='https://apis.google.com/js/platform.js'></script> +</body> </html> diff --git a/documentation/unit-tests.html b/documentation/unit-tests.html index 97225e7c12109b8a570025e95cb18c53ea6c9234..7ccc6ee7469fc9c6e3dc64e3bd46317f18a20891 100644 --- a/documentation/unit-tests.html +++ b/documentation/unit-tests.html @@ -1,284 +1,285 @@ <!DOCTYPE html> <html lang='en'> - <head> - <meta content='text/html; charset=UTF-8' http-equiv='Content-Type'> - <meta content='width=device-width, initial-scale=1' name='viewport'> - <meta content='IE=edge' http-equiv='X-UA-Compatible'> - <meta content='no' name='msapplication-tap-highlight'> - <meta content='Aparapi is an Open-source framework for executing native Java code on the GPU, developed by Syncleus.' name='description'> - <title> - Aparapi | Unit Tests - </title> - <!-- Favicons --> - <link href='/images/favicon/apple-touch-icon-152x152.png' rel='apple-touch-icon-precomposed'> - <meta content='#FFFFFF' name='msapplication-TileColor'> - <meta content='/images/favicon/mstile-144x144.png' name='msapplication-TileImage'> - <link href='/images/favicon/favicon-32x32.png' rel='icon' sizes='32x32'> - <!-- Android 5 Chrome Color --> - <meta content='#EE6E73' name='theme-color'> - <!-- CSS --> - <link href='/stylesheets/highlight.css' media='screen,projection' rel='stylesheet' type='text/css'> - <link href='/stylesheets/style.css' media='screen,projection' rel='stylesheet' type='text/css'> - <link href='http://fonts.googleapis.com/css?family=Inconsolata' rel='stylesheet' type='text/css'> - <link href='http://fonts.googleapis.com/icon?family=Material+Icons' rel='stylesheet'> - </head> - <body> - <header> - <div class='container'> - <a class='button-collapse top-nav waves-effect waves-light circle hide-on-large-only' data-activates='nav-mobile' href='#'> - <i class='material-icons'>menu</i> - </a> - </div> - <ul class='side-nav fixed' id='nav-mobile'> - <li class='logo'> - <a class='brand-logo' href='/' id='logo-container'> - <object data='/images/logo.svg' id='front-page-logo' type='image/svg+xml'>Your browser does not support SVG</object> - </a> - </li> - <li class='search'> - <div class='search-wrapper card'> - <input id='search'> - <i class='material-icons'>search</i> - <div class='search-results'></div> - </div> - </li> - <li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/">Overview</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Introduction</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/about.html">About</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/getting-started.html">Getting Started</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/faq.html">FAQ</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Documentation</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/aparapi-patterns.html">Aparapi Patterns</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/choosing-specific-devices.html">Choosing Specific Devices</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/converting-java-to-opencl.html">Converting Java to OpenCL</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/emulating-multiple-entrypoints.html">Emulating Multiple Entrypoints</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/explicit-buffer-handling.html">Explicit Buffer Handling</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/hsa-enabled-lambda.html">HSA Enabled Lambda</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/kernel-guidelines.html">Kernel Guidelines</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/library-agent-duality.html">Library Agent Duality</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/new-features.html">New Features</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/opencl-bindings.html">OpenCL Bindings</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/private-memory-space.html">Private Memory Space</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/profiling-the-kernel.html">Profiling the Kernel</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/setting-up-hsa.html">Setting Up HSA</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/unit-tests.html">Unit Tests</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/using-hsa-simulator.html">Using HSA Simulator</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/constant-memory.html">Constant Memory</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/local-memory.html">Local Memory</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/multiple-dim-ranges.html">Multiple Dim Ranges</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Proposals</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/multiple-dim-nd-range.html">Multiple Dim ND Range</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/lambdas.html">Lambdas</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/address-space-with-buffers.html">Address Space with Buffers</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/extensions.html">Extensions</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/device.html">Device</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/multiple-entry-points.html">Multiple Entry Points</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/lambda-syntax.html">Lambda Syntax</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/showcase.html">Showcase</a></li></ul></li> - </ul> - </header> - <main> - <div class='section no-pad-bot' id='index-banner'> - <div class='container'> - <h1 class='header center-on-small-only'>Unit Tests</h1> - <div class='row center'> - <h4 class='header col s12 light center'>Unit test Guide Find out how to run Junit tests and how to add new tests.</h4> - </div> - - </div> - - </div> - <div class='container'> - <h1>Unit Test Guide</h1> - - <p>The Unit Test Guide explains the test infrastructure associated with Aparapi, including instructions for executing existing tests adding new test cases. - OpenCLâ„¢ code generation tests</p> - - <p>The initial open source tree includes the codegen subdirectory (test/codegen), which used to validate the Aparapi bytecode to OpenCLâ„¢ conversion.</p> - <pre class="highlight plaintext"><code> - /src/test/java/ - com/aparapi/ - codegen/ - test/ - pom.xml - </code></pre> - <p>The code generation tests to not require OpenCLâ„¢ , AMD APP SDK or a GPU devices to be configured; these tests only validate the creation of valid OpenCLâ„¢ code by comparing against predefined expected output.</p> - - <h2>Running the OpenCLâ„¢ code generation JUnit tests</h2> - - <p>Before executing the code generation tests, build the com.aparapi sub-project and ensure that you have JUnit 4 installed.</p> - - <p>Edit the junit.jar property in test/codegen/build.xml to point to your install directory.</p> - <pre class="highlight xml"><code> - <span class="nt"><property</span> <span class="na">name=</span><span class="s">"junit.jar"</span> <span class="na">value=</span><span class="s">"C:\JUnit4.9\junit-4.9.jar"</span><span class="nt">/></span> - </code></pre> - <p>Initiate the code generation tests using ant.</p> - <pre class="highlight plaintext"><code> - C:\> cd tests/codegen - C:\> ant - <failures will be reported here> - C:> - </code></pre> - <p>View the HTML version of the JUnit report at junit/html/index.html. On Microsoft Windows® platforms use</p> - <pre class="highlight plaintext"><code> - C:\> start junit\html\index.html - </code></pre> - <p>On Linux® platforms just invoke your browser (Firefox in this case).</p> - <pre class="highlight plaintext"><code> - firefox junit\html\index.html - </code></pre> - <h2>Adding a new OpenCLâ„¢ code generation test</h2> - - <p>The test cases for OpenCLâ„¢ code generation are not strictly JUnit tests. Instead the codegen Java tree contains a tool (CreateJUnitTests) to create JUnit test cases from specially formatted test source files.</p> - - <p>The package <code>com.aparapi.test (codegen/src/java/com/aparapi/test)</code> contains all of the existing code generation tests.</p> - - <p>Here is an example that tests the code generation resulting from a call to Kernel.getPassId(), this is taken from com.aparapi.test.CallGetPassId</p> - <pre class="highlight java"><code> - <span class="kn">package</span> <span class="n">com</span><span class="o">.</span><span class="na">aparapi</span><span class="o">.</span><span class="na">test</span><span class="o">;</span> - - <span class="kn">import</span> <span class="nn">com.aparapi.Kernel</span><span class="o">;</span> - - <span class="kd">public</span> <span class="kd">class</span> <span class="nc">CallGetPassId</span> <span class="kd">extends</span> <span class="n">Kernel</span><span class="o">{</span> - <span class="kd">public</span> <span class="kt">void</span> <span class="nf">run</span><span class="o">()</span> <span class="o">{</span> - <span class="kt">int</span> <span class="n">thePassId</span> <span class="o">=</span> <span class="n">getPassId</span><span class="o">();</span> - <span class="o">}</span> - - <span class="o">}</span> - <span class="cm">/**{OpenCL{ - - typedef struct This_s{ - int passid; - }This; - int get_pass_id(This *this){ - return this->passid; - } - __kernel void run( - int passid - ){ - This thisStruct; - This* this=&thisStruct; - this->passid = passid; - { - int thePassId = get_pass_id(this); - return; - } - } - - }OpenCL}**/</span> - </code></pre> - <p>The test source takes the form of a simple class that extends the kernel and a block of OpenCL code between the /<strong>{OpenCL{ and }OpenCL}</strong>/ markers. The code between these markers is the OpenCL code that we expect Aparapi to produce as a result of converting the run() method to OpenCL.</p> - - <p>The code-generating ant build.xml file performs the following steps to generate its report:</p> - - <ul> - <li>compiles the src/java tree. This compiles all the test cases as well as a few ‘utility’ classes.</li> - <li>executes the com.aparapi.test.CreateJUnitTests program. This iterates through all of the test source files and converts them to JUnit form. The generated source is written to the src/genjava tree.</li> - <li>compiles the src/genjava tree to create the required JUnit classes</li> - <li>initiates the JUnit test phase (result data in junit/data)</li> - <li>creates the JUnit report (in junit/html/junit from junit/data)</li> - </ul> - - <p>To create a new test case, just add your test case to the <code>codegen/src/java/com/aparapi/test</code> package (including the expected OpenCL).</p> - - <p>Sometimes different javac implementations (such as Oracle and Eclipse) will generate different bytecode for the same source. When Aparapi converts this bytecode it may yield different (but equally acceptable) OpenCL forms. One example of this is the BooleanToggle test:</p> - <pre class="highlight java"><code> - <span class="kd">public</span> <span class="kd">class</span> <span class="nc">BooleanToggle</span><span class="o">{</span> - <span class="kd">public</span> <span class="kt">void</span> <span class="nf">run</span><span class="o">()</span> <span class="o">{</span> - <span class="kt">boolean</span> <span class="n">pass</span> <span class="o">=</span> <span class="kc">false</span><span class="o">;</span> - - <span class="n">pass</span> <span class="o">=</span> <span class="o">!</span><span class="n">pass</span><span class="o">;</span> - - <span class="o">}</span> - <span class="o">}</span> - </code></pre> - <p>The BooleanToggle test code creates two (slightly different) versions of OpenCLâ„¢ (sadly one line different) depending on the javac compiler.</p> - - <p>This example shows the ‘toggle’ OpenCLâ„¢ created from the bytecode generated by Oracle.</p> - <pre class="highlight java"><code> - <span class="n">pass</span> <span class="o">=</span> <span class="n">pass</span><span class="o">==</span><span class="mi">1</span><span class="o">?</span><span class="mi">0</span><span class="o">:</span><span class="mi">1</span><span class="o">;</span> - </code></pre> - <p>This example shows the bytecode from Eclipse javac:</p> - <pre class="highlight java"><code> - <span class="n">pass</span> <span class="o">=</span> <span class="n">pass</span><span class="o">==</span><span class="mi">0</span><span class="o">?</span><span class="mi">1</span><span class="o">:</span><span class="mi">0</span><span class="o">;</span> - </code></pre> - <p>Logically either of the above are correct. However, to accommodate the alternate acceptable forms we need to add two complete <code>/**{OpenCL{ and }OpenCL}**/</code> sections to the file. If either matches, the test will pass.</p> - - <p>Here is the complete BooleanToggle code.</p> - <pre class="highlight java"><code> - <span class="kn">package</span> <span class="n">com</span><span class="o">.</span><span class="na">aparapi</span><span class="o">.</span><span class="na">test</span><span class="o">;</span> - - <span class="kd">public</span> <span class="kd">class</span> <span class="nc">BooleanToggle</span><span class="o">{</span> - <span class="kd">public</span> <span class="kt">void</span> <span class="nf">run</span><span class="o">()</span> <span class="o">{</span> - <span class="kt">boolean</span> <span class="n">pass</span> <span class="o">=</span> <span class="kc">false</span><span class="o">;</span> - - <span class="n">pass</span> <span class="o">=</span> <span class="o">!</span><span class="n">pass</span><span class="o">;</span> - - <span class="o">}</span> - <span class="o">}</span> - <span class="cm">/**{OpenCL{ - typedef struct This_s{ - int passid; - }This; - int get_pass_id(This *this){ - return this->passid; - } - __kernel void run( - int passid - ){ - This thisStruct; - This* this=&thisStruct; - this->passid = passid; - { - char pass = 0; - pass = (pass==0)?1:0; - return; - } - } - }OpenCL}**/</span> - <span class="cm">/**{OpenCL{ - typedef struct This_s{ - int passid; - }This; - int get_pass_id(This *this){ - return this->passid; - } - __kernel void run( - int passid - ){ - This thisStruct; - This* this=&thisStruct; - this->passid = passid; - { - char pass = 0; - pass = (pass!=0)?0:1; - return; - } - } - }OpenCL}**/</span> - </code></pre> - </div> - </main> - <footer class='page-footer'> - <div class='container'> - <div class='row'> - <div class='col l4 s12'> - <h5 class='white-text'>Help Aparapi Grow</h5> - <p class='grey-text text-lighten-4'>We are a team of volunteers working on this project like it's our full time job. Any amount would help support and continue development on this project and is greatly appreciated.</p> - <form action='https://www.paypal.com/cgi-bin/webscr' id='paypal-donate' method='post' target='_top'> - <input name='cmd' type='hidden' value='_s-xclick'> - <input name='encrypted' type='hidden' value='-----BEGIN PKCS7-----MIIHoAYJKoZIhvcNAQcEoIIHkTCCB40CAQExggEwMIIBLAIBADCBlDCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20CAQAwDQYJKoZIhvcNAQEBBQAEgYATcKxN8t35TG2x34eY272SuZO3QbGy+BTGIM5DRV6Hmosotzw2TF42ceWmbXb3Gk4Wy5kUgo4TgHExCZHUSlHUl+A9KWLFejotgQJPhbiBsnns3klWbKftA3LEnP/kz/SW7OyBlpluoHoEGb354/aoX3JEctp3akHiZEmD7JyEgjELMAkGBSsOAwIaBQAwggEcBgkqhkiG9w0BBwEwFAYIKoZIhvcNAwcECOGCJwba6JICgIH4RtE1LE3juagKs+swI5tb9Y2LacWo+qn1H1aLKeg57bQMqqcWYvkoO1joYoglPc1h4mO0egZjHPQ6ih0K0IYlXw2SRpNylSlIMUE3GW6smjSSwRhscZfXQYUnmQsfYvkFwoKrlZGf/1u0Q7nwlZ1szIKnDMZ5f+k8xBcM0sMNutn/y9CH6A3zo01gQBIF29+1WYAoQspNAnfWQy3ydV7nbjIA9ThDp2WquWw3EVlvqlvm/3C2AFuH/L4q0ltn3qjkCdzXK0O2jW3TRrzligPkAy6CN0Tw2jGW5GENNC1L92vHFH4kBXUPlhvw39TgoN7/KRUjVoYPYgugggOHMIIDgzCCAuygAwIBAgIBADANBgkqhkiG9w0BAQUFADCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20wHhcNMDQwMjEzMTAxMzE1WhcNMzUwMjEzMTAxMzE1WjCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20wgZ8wDQYJKoZIhvcNAQEBBQADgY0AMIGJAoGBAMFHTt38RMxLXJyO2SmS+Ndl72T7oKJ4u4uw+6awntALWh03PewmIJuzbALScsTS4sZoS1fKciBGoh11gIfHzylvkdNe/hJl66/RGqrj5rFb08sAABNTzDTiqqNpJeBsYs/c2aiGozptX2RlnBktH+SUNpAajW724Nv2Wvhif6sFAgMBAAGjge4wgeswHQYDVR0OBBYEFJaffLvGbxe9WT9S1wob7BDWZJRrMIG7BgNVHSMEgbMwgbCAFJaffLvGbxe9WT9S1wob7BDWZJRroYGUpIGRMIGOMQswCQYDVQQGEwJVUzELMAkGA1UECBMCQ0ExFjAUBgNVBAcTDU1vdW50YWluIFZpZXcxFDASBgNVBAoTC1BheVBhbCBJbmMuMRMwEQYDVQQLFApsaXZlX2NlcnRzMREwDwYDVQQDFAhsaXZlX2FwaTEcMBoGCSqGSIb3DQEJARYNcmVAcGF5cGFsLmNvbYIBADAMBgNVHRMEBTADAQH/MA0GCSqGSIb3DQEBBQUAA4GBAIFfOlaagFrl71+jq6OKidbWFSE+Q4FqROvdgIONth+8kSK//Y/4ihuE4Ymvzn5ceE3S/iBSQQMjyvb+s2TWbQYDwcp129OPIbD9epdr4tJOUNiSojw7BHwYRiPh58S1xGlFgHFXwrEBb3dgNbMUa+u4qectsMAXpVHnD9wIyfmHMYIBmjCCAZYCAQEwgZQwgY4xCzAJBgNVBAYTAlVTMQswCQYDVQQIEwJDQTEWMBQGA1UEBxMNTW91bnRhaW4gVmlldzEUMBIGA1UEChMLUGF5UGFsIEluYy4xEzARBgNVBAsUCmxpdmVfY2VydHMxETAPBgNVBAMUCGxpdmVfYXBpMRwwGgYJKoZIhvcNAQkBFg1yZUBwYXlwYWwuY29tAgEAMAkGBSsOAwIaBQCgXTAYBgkqhkiG9w0BCQMxCwYJKoZIhvcNAQcBMBwGCSqGSIb3DQEJBTEPFw0xNjExMjkyMjA1NTNaMCMGCSqGSIb3DQEJBDEWBBS4i3Exr/pFcKOJy8uKmH+nGIMjqDANBgkqhkiG9w0BAQEFAASBgDAbFZ2jieloeB/0wCAcvYCFAIXmmBaMS5js/byzU5gK7exSTlRMX74IkmHemItaOcw3wyFlu4i118D9K9SbSbFiX9DGDcezGh42u/6G8TuZMwlvmiehwMioTVcm4jWG40YLiv8pJZypfoSx2w4IAFb4na5i/E1qOrwQOpiBho+s-----END PKCS7-----'> - <button alt='PayPal - The safer, easier way to pay online!' class='btn waves-effect waves-light red lighten-3' name='action' type='submit'> - Donate Now - </button> - </form> - </div> - <div class='col l4 s12'> - <h5 class='white-text'>Join the Discussion</h5> - <p class='grey-text text-lighten-4'>We have a Gitter chat room set up where you can talk directly with us. Come in and discuss new features, future goals, general problems or questions, or anything else you can think of.</p> - <a class='btn waves-effect waves-light red lighten-3' href='https://gitter.im/Syncleus/aparapi' target='_blank'>Chat</a> - </div> - <div class='col l4 s12' style='overflow: hidden;'> - <h5 class='white-text'>Connect</h5> - <iframe allowtransparency='true' frameborder='0' height='30' scrolling='0' src='http://ghbtns.com/github-btn.html?user=Syncleus&repo=aparapi&type=watch&count=true&size=large' width='170'></iframe> - <br> - <a class='twitter-follow-button' data-dnt='true' data-show-count='true' data-size='large' href='https://twitter.com/AparapiLib'>Follow @AparapiLib</a> - <br> - <div class='g-follow' data-annotation='bubble' data-height='24' data-href='https://plus.google.com/102266131584900704956' data-rel='publisher'></div> - </div> - </div> - </div> - <div class='footer-copyright'> - <div class='container'> - © 2016-2017 Syncleus, All rights reserved. - <a class='grey-text text-lighten-4 right' href='https://github.com/Syncleus/aparapi/blob/master/LICENSE'>Apache License v2</a> - </div> - </div> - </footer> - <!-- Scripts --> - <script src='https://code.jquery.com/jquery-2.1.4.min.js'></script> - <script> - if (!window.jQuery) { document.write('<script src="bin/jquery-2.1.1.min.js"><\/script>'); } - </script> - <script src='/javascripts/jquery.timeago.js'></script> - <script src='/javascripts/materialize.min.js'></script> - <script src='/javascripts/lunr.min.js'></script> - <script src='/javascripts/search.js'></script> - <script src='/javascripts/materialize.js'></script> - <script src='/javascripts/init.js'></script> - <!-- Twitter Button --> - <script> - !function(d,s,id){var js,fjs=d.getElementsByTagName(s)[0],p=/^http:/.test(d.location)?'http':'https';if(!d.getElementById(id)){js=d.createElement(s);js.id=id;js.src=p+'://platform.twitter.com/widgets.js';fjs.parentNode.insertBefore(js,fjs);}}(document, 'script', 'twitter-wjs'); - </script> - <!-- Google Plus Button --> - <script async='' defer='defer' src='https://apis.google.com/js/platform.js'></script> - </body> +<head> +<meta content='text/html; charset=UTF-8' http-equiv='Content-Type'> +<meta content='width=device-width, initial-scale=1' name='viewport'> +<meta content='IE=edge' http-equiv='X-UA-Compatible'> +<meta content='no' name='msapplication-tap-highlight'> +<meta content='Aparapi is an Open-source framework for executing native Java code on the GPU, developed by Syncleus.' name='description'> +<title> +Aparapi | Unit Tests +</title> +<!-- Favicons --> +<link href='/images/favicon/apple-touch-icon-152x152.png' rel='apple-touch-icon-precomposed'> +<meta content='#FFFFFF' name='msapplication-TileColor'> +<meta content='/images/favicon/mstile-144x144.png' name='msapplication-TileImage'> +<link href='/images/favicon/favicon-32x32.png' rel='icon' sizes='32x32'> +<!-- Android 5 Chrome Color --> +<meta content='#EE6E73' name='theme-color'> +<!-- CSS --> +<link href='/stylesheets/highlight.css' media='screen,projection' rel='stylesheet' type='text/css'> +<link href='/stylesheets/style.css' media='screen,projection' rel='stylesheet' type='text/css'> +<link href='http://fonts.googleapis.com/css?family=Inconsolata' rel='stylesheet' type='text/css'> +<link href='http://fonts.googleapis.com/icon?family=Material+Icons' rel='stylesheet'> +</head> +<body> +<header> +<div class='container'> +<a class='button-collapse top-nav waves-effect waves-light circle hide-on-large-only' data-activates='nav-mobile' href='#'> +<i class='material-icons'>menu</i> +</a> +</div> +<ul class='side-nav fixed' id='nav-mobile'> +<li class='logo'> +<a class='brand-logo' href='/' id='logo-container'> +<object data='/images/logo.svg' id='front-page-logo' type='image/svg+xml'>Your browser does not support SVG</object> +</a> +</li> +<li class='search'> +<div class='search-wrapper card'> +<input id='search'> +<i class='material-icons'>search</i> +<div class='search-results'></div> +</div> +</li> +<li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/">Overview</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Introduction</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/about.html">About</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/getting-started.html">Getting Started</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/faq.html">FAQ</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Documentation</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/aparapi-patterns.html">Aparapi Patterns</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/choosing-specific-devices.html">Choosing Specific Devices</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/converting-java-to-opencl.html">Converting Java to OpenCL</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/emulating-multiple-entrypoints.html">Emulating Multiple Entrypoints</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/explicit-buffer-handling.html">Explicit Buffer Handling</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/hsa-enabled-lambda.html">HSA Enabled Lambda</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/kernel-guidelines.html">Kernel Guidelines</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/library-agent-duality.html">Library Agent Duality</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/new-features.html">New Features</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/opencl-bindings.html">OpenCL Bindings</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/private-memory-space.html">Private Memory Space</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/profiling-the-kernel.html">Profiling the Kernel</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/setting-up-hsa.html">Setting Up HSA</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/unit-tests.html">Unit Tests</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/using-hsa-simulator.html">Using HSA Simulator</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/constant-memory.html">Constant Memory</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/local-memory.html">Local Memory</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/multiple-dim-ranges.html">Multiple Dim Ranges</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Proposals</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/multiple-dim-nd-range.html">Multiple Dim ND Range</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/lambdas.html">Lambdas</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/address-space-with-buffers.html">Address Space with Buffers</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/extensions.html">Extensions</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/device.html">Device</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/multiple-entry-points.html">Multiple Entry Points</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/lambda-syntax.html">Lambda Syntax</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/showcase.html">Showcase</a></li></ul></li> +</ul> +</header> +<main> +<div class='section no-pad-bot' id='index-banner'> +<div class='container'> +<h1 class='header center-on-small-only'>Unit Tests</h1> +<div class='row center'> +<h4 class='header col s12 light center'>Unit test Guide Find out how to run Junit tests and how to add new tests.</h4> +</div> + +</div> + +</div> +<div class='container'> +<h1>Unit Test Guide</h1> + +<p>The Unit Test Guide explains the test infrastructure associated with Aparapi, including instructions for executing existing tests adding new test cases. +OpenCLâ„¢ code generation tests</p> + +<p>The initial open source tree includes the codegen subdirectory (test/codegen), which used to validate the Aparapi bytecode to OpenCLâ„¢ conversion.</p> +<div class="highlight"><pre class="highlight plaintext"><code> +/src/test/java/ + com/aparapi/ + codegen/ + test/ + pom.xml +</code></pre></div> +<p>The code generation tests to not require OpenCLâ„¢ , AMD APP SDK or a GPU devices to be configured; these tests only validate the creation of valid OpenCLâ„¢ code by comparing against predefined expected output.</p> + +<h2>Running the OpenCLâ„¢ code generation JUnit tests</h2> + +<p>Before executing the code generation tests, build the com.aparapi sub-project and ensure that you have JUnit 4 installed.</p> + +<p>Edit the junit.jar property in test/codegen/build.xml to point to your install directory.</p> +<div class="highlight"><pre class="highlight xml"><code> +<span class="nt"><property</span> <span class="na">name=</span><span class="s">"junit.jar"</span> <span class="na">value=</span><span class="s">"C:\JUnit4.9\junit-4.9.jar"</span><span class="nt">/></span> +</code></pre></div> +<p>Initiate the code generation tests using ant.</p> +<div class="highlight"><pre class="highlight plaintext"><code> +C:\> cd tests/codegen +C:\> ant +<failures will be reported here> +C:> +</code></pre></div> +<p>View the HTML version of the JUnit report at junit/html/index.html. On Microsoft Windows® platforms use</p> +<div class="highlight"><pre class="highlight plaintext"><code> +C:\> start junit\html\index.html +</code></pre></div> +<p>On Linux® platforms just invoke your browser (Firefox in this case).</p> +<div class="highlight"><pre class="highlight plaintext"><code> +firefox junit\html\index.html +</code></pre></div> +<h2>Adding a new OpenCLâ„¢ code generation test</h2> + +<p>The test cases for OpenCLâ„¢ code generation are not strictly JUnit tests. Instead the codegen Java tree contains a tool (CreateJUnitTests) to create JUnit test cases from specially formatted test source files.</p> + +<p>The package <code>com.aparapi.test (codegen/src/java/com/aparapi/test)</code> contains all of the existing code generation tests.</p> + +<p>Here is an example that tests the code generation resulting from a call to Kernel.getPassId(), this is taken from com.aparapi.test.CallGetPassId</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="kn">package</span> <span class="n">com</span><span class="o">.</span><span class="na">aparapi</span><span class="o">.</span><span class="na">test</span><span class="o">;</span> + +<span class="kn">import</span> <span class="nn">com.aparapi.Kernel</span><span class="o">;</span> + +<span class="kd">public</span> <span class="kd">class</span> <span class="nc">CallGetPassId</span> <span class="kd">extends</span> <span class="n">Kernel</span><span class="o">{</span> + <span class="kd">public</span> <span class="kt">void</span> <span class="nf">run</span><span class="o">()</span> <span class="o">{</span> + <span class="kt">int</span> <span class="n">thePassId</span> <span class="o">=</span> <span class="n">getPassId</span><span class="o">();</span> + <span class="o">}</span> + +<span class="o">}</span> +<span class="cm">/**{OpenCL{ + +typedef struct This_s{ + int passid; +}This; +int get_pass_id(This *this){ + return this->passid; +} +__kernel void run( + int passid +){ + This thisStruct; + This* this=&thisStruct; + this->passid = passid; + { + int thePassId = get_pass_id(this); + return; + } +} + +}OpenCL}**/</span> +</code></pre></div> +<p>The test source takes the form of a simple class that extends the kernel and a block of OpenCL code between the /<strong>{OpenCL{ and }OpenCL}</strong>/ markers. The code between these markers is the OpenCL code that we expect Aparapi to produce as a result of converting the run() method to OpenCL.</p> + +<p>The code-generating ant build.xml file performs the following steps to generate its report:</p> + +<ul> +<li>compiles the src/java tree. This compiles all the test cases as well as a few ‘utility’ classes.</li> +<li>executes the com.aparapi.test.CreateJUnitTests program. This iterates through all of the test source files and converts them to JUnit form. The generated source is written to the src/genjava tree.</li> +<li>compiles the src/genjava tree to create the required JUnit classes</li> +<li>initiates the JUnit test phase (result data in junit/data)</li> +<li>creates the JUnit report (in junit/html/junit from junit/data)</li> +</ul> + +<p>To create a new test case, just add your test case to the <code>codegen/src/java/com/aparapi/test</code> package (including the expected OpenCL).</p> + +<p>Sometimes different javac implementations (such as Oracle and Eclipse) will generate different bytecode for the same source. When Aparapi converts this bytecode it may yield different (but equally acceptable) OpenCL forms. One example of this is the BooleanToggle test:</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="kd">public</span> <span class="kd">class</span> <span class="nc">BooleanToggle</span><span class="o">{</span> + <span class="kd">public</span> <span class="kt">void</span> <span class="nf">run</span><span class="o">()</span> <span class="o">{</span> + <span class="kt">boolean</span> <span class="n">pass</span> <span class="o">=</span> <span class="kc">false</span><span class="o">;</span> + + <span class="n">pass</span> <span class="o">=</span> <span class="o">!</span><span class="n">pass</span><span class="o">;</span> + + <span class="o">}</span> +<span class="o">}</span> +</code></pre></div> +<p>The BooleanToggle test code creates two (slightly different) versions of OpenCLâ„¢ (sadly one line different) depending on the javac compiler.</p> + +<p>This example shows the ‘toggle’ OpenCLâ„¢ created from the bytecode generated by Oracle.</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="n">pass</span> <span class="o">=</span> <span class="n">pass</span><span class="o">==</span><span class="mi">1</span><span class="o">?</span><span class="mi">0</span><span class="o">:</span><span class="mi">1</span><span class="o">;</span> +</code></pre></div> +<p>This example shows the bytecode from Eclipse javac:</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="n">pass</span> <span class="o">=</span> <span class="n">pass</span><span class="o">==</span><span class="mi">0</span><span class="o">?</span><span class="mi">1</span><span class="o">:</span><span class="mi">0</span><span class="o">;</span> +</code></pre></div> +<p>Logically either of the above are correct. However, to accommodate the alternate acceptable forms we need to add two complete <code>/**{OpenCL{ and }OpenCL}**/</code> sections to the file. If either matches, the test will pass.</p> + +<p>Here is the complete BooleanToggle code.</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="kn">package</span> <span class="n">com</span><span class="o">.</span><span class="na">aparapi</span><span class="o">.</span><span class="na">test</span><span class="o">;</span> + +<span class="kd">public</span> <span class="kd">class</span> <span class="nc">BooleanToggle</span><span class="o">{</span> + <span class="kd">public</span> <span class="kt">void</span> <span class="nf">run</span><span class="o">()</span> <span class="o">{</span> + <span class="kt">boolean</span> <span class="n">pass</span> <span class="o">=</span> <span class="kc">false</span><span class="o">;</span> + + <span class="n">pass</span> <span class="o">=</span> <span class="o">!</span><span class="n">pass</span><span class="o">;</span> + + <span class="o">}</span> +<span class="o">}</span> +<span class="cm">/**{OpenCL{ +typedef struct This_s{ + int passid; +}This; +int get_pass_id(This *this){ + return this->passid; +} +__kernel void run( + int passid +){ + This thisStruct; + This* this=&thisStruct; + this->passid = passid; + { + char pass = 0; + pass = (pass==0)?1:0; + return; + } +} +}OpenCL}**/</span> +<span class="cm">/**{OpenCL{ +typedef struct This_s{ + int passid; +}This; +int get_pass_id(This *this){ + return this->passid; +} +__kernel void run( + int passid +){ + This thisStruct; + This* this=&thisStruct; + this->passid = passid; + { + char pass = 0; + pass = (pass!=0)?0:1; + return; + } +} +}OpenCL}**/</span> +</code></pre></div> +</div> +</main> +<footer class='page-footer'> +<div class='container'> +<div class='row'> +<div class='col l4 s12'> +<h5 class='white-text'>Help Aparapi Grow</h5> +<p class='grey-text text-lighten-4'>We are a team of volunteers working on this project like it's our full time job. Any amount would help support and continue development on this project and is greatly appreciated.</p> +<form action='https://www.paypal.com/cgi-bin/webscr' id='paypal-donate' method='post' target='_top'> +<input name='cmd' type='hidden' value='_s-xclick'> +<input name='encrypted' type='hidden' value='-----BEGIN PKCS7-----MIIHoAYJKoZIhvcNAQcEoIIHkTCCB40CAQExggEwMIIBLAIBADCBlDCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20CAQAwDQYJKoZIhvcNAQEBBQAEgYATcKxN8t35TG2x34eY272SuZO3QbGy+BTGIM5DRV6Hmosotzw2TF42ceWmbXb3Gk4Wy5kUgo4TgHExCZHUSlHUl+A9KWLFejotgQJPhbiBsnns3klWbKftA3LEnP/kz/SW7OyBlpluoHoEGb354/aoX3JEctp3akHiZEmD7JyEgjELMAkGBSsOAwIaBQAwggEcBgkqhkiG9w0BBwEwFAYIKoZIhvcNAwcECOGCJwba6JICgIH4RtE1LE3juagKs+swI5tb9Y2LacWo+qn1H1aLKeg57bQMqqcWYvkoO1joYoglPc1h4mO0egZjHPQ6ih0K0IYlXw2SRpNylSlIMUE3GW6smjSSwRhscZfXQYUnmQsfYvkFwoKrlZGf/1u0Q7nwlZ1szIKnDMZ5f+k8xBcM0sMNutn/y9CH6A3zo01gQBIF29+1WYAoQspNAnfWQy3ydV7nbjIA9ThDp2WquWw3EVlvqlvm/3C2AFuH/L4q0ltn3qjkCdzXK0O2jW3TRrzligPkAy6CN0Tw2jGW5GENNC1L92vHFH4kBXUPlhvw39TgoN7/KRUjVoYPYgugggOHMIIDgzCCAuygAwIBAgIBADANBgkqhkiG9w0BAQUFADCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20wHhcNMDQwMjEzMTAxMzE1WhcNMzUwMjEzMTAxMzE1WjCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20wgZ8wDQYJKoZIhvcNAQEBBQADgY0AMIGJAoGBAMFHTt38RMxLXJyO2SmS+Ndl72T7oKJ4u4uw+6awntALWh03PewmIJuzbALScsTS4sZoS1fKciBGoh11gIfHzylvkdNe/hJl66/RGqrj5rFb08sAABNTzDTiqqNpJeBsYs/c2aiGozptX2RlnBktH+SUNpAajW724Nv2Wvhif6sFAgMBAAGjge4wgeswHQYDVR0OBBYEFJaffLvGbxe9WT9S1wob7BDWZJRrMIG7BgNVHSMEgbMwgbCAFJaffLvGbxe9WT9S1wob7BDWZJRroYGUpIGRMIGOMQswCQYDVQQGEwJVUzELMAkGA1UECBMCQ0ExFjAUBgNVBAcTDU1vdW50YWluIFZpZXcxFDASBgNVBAoTC1BheVBhbCBJbmMuMRMwEQYDVQQLFApsaXZlX2NlcnRzMREwDwYDVQQDFAhsaXZlX2FwaTEcMBoGCSqGSIb3DQEJARYNcmVAcGF5cGFsLmNvbYIBADAMBgNVHRMEBTADAQH/MA0GCSqGSIb3DQEBBQUAA4GBAIFfOlaagFrl71+jq6OKidbWFSE+Q4FqROvdgIONth+8kSK//Y/4ihuE4Ymvzn5ceE3S/iBSQQMjyvb+s2TWbQYDwcp129OPIbD9epdr4tJOUNiSojw7BHwYRiPh58S1xGlFgHFXwrEBb3dgNbMUa+u4qectsMAXpVHnD9wIyfmHMYIBmjCCAZYCAQEwgZQwgY4xCzAJBgNVBAYTAlVTMQswCQYDVQQIEwJDQTEWMBQGA1UEBxMNTW91bnRhaW4gVmlldzEUMBIGA1UEChMLUGF5UGFsIEluYy4xEzARBgNVBAsUCmxpdmVfY2VydHMxETAPBgNVBAMUCGxpdmVfYXBpMRwwGgYJKoZIhvcNAQkBFg1yZUBwYXlwYWwuY29tAgEAMAkGBSsOAwIaBQCgXTAYBgkqhkiG9w0BCQMxCwYJKoZIhvcNAQcBMBwGCSqGSIb3DQEJBTEPFw0xNjExMjkyMjA1NTNaMCMGCSqGSIb3DQEJBDEWBBS4i3Exr/pFcKOJy8uKmH+nGIMjqDANBgkqhkiG9w0BAQEFAASBgDAbFZ2jieloeB/0wCAcvYCFAIXmmBaMS5js/byzU5gK7exSTlRMX74IkmHemItaOcw3wyFlu4i118D9K9SbSbFiX9DGDcezGh42u/6G8TuZMwlvmiehwMioTVcm4jWG40YLiv8pJZypfoSx2w4IAFb4na5i/E1qOrwQOpiBho+s-----END PKCS7----- +'> +<button alt='PayPal - The safer, easier way to pay online!' class='btn waves-effect waves-light red lighten-3' name='action' type='submit'> +Donate Now +</button> +</form> +</div> +<div class='col l4 s12'> +<h5 class='white-text'>Join the Discussion</h5> +<p class='grey-text text-lighten-4'>We have a Gitter chat room set up where you can talk directly with us. Come in and discuss new features, future goals, general problems or questions, or anything else you can think of.</p> +<a class='btn waves-effect waves-light red lighten-3' href='https://gitter.im/Syncleus/aparapi' target='_blank'>Chat</a> +</div> +<div class='col l4 s12' style='overflow: hidden;'> +<h5 class='white-text'>Connect</h5> +<iframe allowtransparency='true' frameborder='0' height='30' scrolling='0' src='http://ghbtns.com/github-btn.html?user=Syncleus&repo=aparapi&type=watch&count=true&size=large' width='170'></iframe> +<br> +<a class='twitter-follow-button' data-dnt='true' data-show-count='true' data-size='large' href='https://twitter.com/AparapiLib'>Follow @AparapiLib</a> +<br> +<div class='g-follow' data-annotation='bubble' data-height='24' data-href='https://plus.google.com/102266131584900704956' data-rel='publisher'></div> +</div> +</div> +</div> +<div class='footer-copyright'> +<div class='container'> +© 2016-2017 Syncleus, All rights reserved. +<a class='grey-text text-lighten-4 right' href='https://github.com/Syncleus/aparapi/blob/master/LICENSE'>Apache License v2</a> +</div> +</div> +</footer> +<!-- Scripts --> +<script src='https://code.jquery.com/jquery-2.1.4.min.js'></script> +<script> + if (!window.jQuery) { document.write('<script src="bin/jquery-2.1.1.min.js"><\/script>'); } +</script> +<script src='/javascripts/jquery.timeago.js'></script> +<script src='/javascripts/materialize.min.js'></script> +<script src='/javascripts/lunr.min.js'></script> +<script src='/javascripts/search.js'></script> +<script src='/javascripts/materialize.js'></script> +<script src='/javascripts/init.js'></script> +<!-- Twitter Button --> +<script> + !function(d,s,id){var js,fjs=d.getElementsByTagName(s)[0],p=/^http:/.test(d.location)?'http':'https';if(!d.getElementById(id)){js=d.createElement(s);js.id=id;js.src=p+'://platform.twitter.com/widgets.js';fjs.parentNode.insertBefore(js,fjs);}}(document, 'script', 'twitter-wjs'); +</script> +<!-- Google Plus Button --> +<script async='' defer='defer' src='https://apis.google.com/js/platform.js'></script> +</body> </html> diff --git a/documentation/using-hsa-simulator.html b/documentation/using-hsa-simulator.html index 3c1de463804294e9574194710f83f273c91ac79c..3e9410645582939da3653b6bf29e32aacb43998f 100644 --- a/documentation/using-hsa-simulator.html +++ b/documentation/using-hsa-simulator.html @@ -1,161 +1,162 @@ <!DOCTYPE html> <html lang='en'> - <head> - <meta content='text/html; charset=UTF-8' http-equiv='Content-Type'> - <meta content='width=device-width, initial-scale=1' name='viewport'> - <meta content='IE=edge' http-equiv='X-UA-Compatible'> - <meta content='no' name='msapplication-tap-highlight'> - <meta content='Aparapi is an Open-source framework for executing native Java code on the GPU, developed by Syncleus.' name='description'> - <title> - Aparapi | Using HSA Simulator - </title> - <!-- Favicons --> - <link href='/images/favicon/apple-touch-icon-152x152.png' rel='apple-touch-icon-precomposed'> - <meta content='#FFFFFF' name='msapplication-TileColor'> - <meta content='/images/favicon/mstile-144x144.png' name='msapplication-TileImage'> - <link href='/images/favicon/favicon-32x32.png' rel='icon' sizes='32x32'> - <!-- Android 5 Chrome Color --> - <meta content='#EE6E73' name='theme-color'> - <!-- CSS --> - <link href='/stylesheets/highlight.css' media='screen,projection' rel='stylesheet' type='text/css'> - <link href='/stylesheets/style.css' media='screen,projection' rel='stylesheet' type='text/css'> - <link href='http://fonts.googleapis.com/css?family=Inconsolata' rel='stylesheet' type='text/css'> - <link href='http://fonts.googleapis.com/icon?family=Material+Icons' rel='stylesheet'> - </head> - <body> - <header> - <div class='container'> - <a class='button-collapse top-nav waves-effect waves-light circle hide-on-large-only' data-activates='nav-mobile' href='#'> - <i class='material-icons'>menu</i> - </a> - </div> - <ul class='side-nav fixed' id='nav-mobile'> - <li class='logo'> - <a class='brand-logo' href='/' id='logo-container'> - <object data='/images/logo.svg' id='front-page-logo' type='image/svg+xml'>Your browser does not support SVG</object> - </a> - </li> - <li class='search'> - <div class='search-wrapper card'> - <input id='search'> - <i class='material-icons'>search</i> - <div class='search-results'></div> - </div> - </li> - <li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/">Overview</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Introduction</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/about.html">About</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/getting-started.html">Getting Started</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/faq.html">FAQ</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Documentation</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/aparapi-patterns.html">Aparapi Patterns</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/choosing-specific-devices.html">Choosing Specific Devices</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/converting-java-to-opencl.html">Converting Java to OpenCL</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/emulating-multiple-entrypoints.html">Emulating Multiple Entrypoints</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/explicit-buffer-handling.html">Explicit Buffer Handling</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/hsa-enabled-lambda.html">HSA Enabled Lambda</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/kernel-guidelines.html">Kernel Guidelines</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/library-agent-duality.html">Library Agent Duality</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/new-features.html">New Features</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/opencl-bindings.html">OpenCL Bindings</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/private-memory-space.html">Private Memory Space</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/profiling-the-kernel.html">Profiling the Kernel</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/setting-up-hsa.html">Setting Up HSA</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/unit-tests.html">Unit Tests</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/using-hsa-simulator.html">Using HSA Simulator</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/constant-memory.html">Constant Memory</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/local-memory.html">Local Memory</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/multiple-dim-ranges.html">Multiple Dim Ranges</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Proposals</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/multiple-dim-nd-range.html">Multiple Dim ND Range</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/lambdas.html">Lambdas</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/address-space-with-buffers.html">Address Space with Buffers</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/extensions.html">Extensions</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/device.html">Device</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/multiple-entry-points.html">Multiple Entry Points</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/lambda-syntax.html">Lambda Syntax</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/showcase.html">Showcase</a></li></ul></li> - </ul> - </header> - <main> - <div class='section no-pad-bot' id='index-banner'> - <div class='container'> - <h1 class='header center-on-small-only'>Using HSA Simulator</h1> - <div class='row center'> - <h4 class='header col s12 light center'>Using Aparapi lambda branch with HSA Simulator.</h4> - </div> - - </div> - - </div> - <div class='container'> - <h2>Introduction</h2> - - <p>Although HSA compatible devices are available, we understand that Aparapi developers may not have access to these devices.</p> - - <p>The HSA foundation has open sourced an LLVM based HSAIL emulator which we can use to test HSAIL generated code.</p> - - <p>The project is based here (<a href="https://github.com/HSAFoundation/Okra-Interface-to-HSAIL-Simulator" rel="nofollow">https://github.com/HSAFoundation/Okra-Interface-to-HSAIL-Simulator</a>) but we have extracted detailed download and build instructions for Ubuntu below.</p> - - <p>Aparapi users/developers can use this simulator to test correctness.</p> - - <h2>Building the HSA Simulator on Ubuntu</h2> - - <p>We assume you have ant, svn and g++ available because you can build other aparapi artifacts.</p> - - <p>You will also need git, libelf-dev, libdwarf-dev, flex and cmake</p> - <pre class="highlight java"><code> - <span class="err">$</span> <span class="n">sudo</span> <span class="n">apt</span><span class="o">-</span><span class="n">get</span> <span class="n">install</span> <span class="n">git</span> <span class="n">libelf</span><span class="o">-</span><span class="n">dev</span> <span class="n">libdwarf</span><span class="o">-</span><span class="n">dev</span> <span class="n">flex</span> <span class="n">cmake</span> - </code></pre> - <p>login…</p> - <pre class="highlight java"><code> - <span class="err">$</span> <span class="n">git</span> <span class="n">clone</span> <span class="nl">https:</span><span class="c1">//github.com/HSAFoundation/Okra-Interface-to-HSAIL-Simulator.git okra</span> - <span class="err">$</span> <span class="n">cd</span> <span class="n">okra</span> - <span class="err">$</span> <span class="n">ant</span> <span class="o">-</span><span class="n">f</span> <span class="n">build</span><span class="o">-</span><span class="n">okra</span><span class="o">-</span><span class="n">sim</span><span class="o">.</span><span class="na">xml</span> - </code></pre> - <h2>The build should take approximately 15 mins.</h2> - - <p>How to setup and test an initial lambda/HSA enabled Aparapi build - Assuming you have built okra in /home/gfrost/okra</p> - - <p>Assuming your Java8 JDK is in /home/gfrost/jdk1.8.0</p> - - <p>Assuming your aparapi svn trunk is /home/gfrost/aparapi</p> - <pre class="highlight java"><code> - <span class="err">$</span> <span class="n">export</span> <span class="n">JAVA_HOME</span><span class="o">=/</span><span class="n">home</span><span class="o">/</span><span class="n">gfrost</span><span class="o">/</span><span class="n">jdk1</span><span class="o">.</span><span class="mf">8.0</span> - <span class="err">$</span> <span class="n">export</span> <span class="n">OKRA</span><span class="o">=/</span><span class="n">home</span><span class="o">/</span><span class="n">gfrost</span><span class="o">/</span><span class="n">okra</span> - <span class="err">$</span> <span class="n">export</span> <span class="n">PATH</span><span class="o">=</span><span class="err">$</span><span class="o">{</span><span class="n">PATH</span><span class="o">}:</span><span class="err">$</span><span class="o">{</span><span class="n">JAVA_HOME</span><span class="o">}/</span><span class="nl">bin:</span><span class="err">$</span><span class="o">{</span><span class="n">OKRA</span><span class="o">}/</span><span class="n">dist</span><span class="o">/</span><span class="n">bin</span> - <span class="err">$</span> <span class="n">java</span> <span class="o">-</span><span class="n">version</span> - <span class="n">java</span> <span class="n">version</span> <span class="s">"1.8.0-ea"</span> - <span class="n">Java</span><span class="o">(</span><span class="n">TM</span><span class="o">)</span> <span class="n">SE</span> <span class="n">Runtime</span> <span class="nf">Environment</span> <span class="o">(</span><span class="n">build</span> <span class="mf">1.8</span><span class="o">.</span><span class="mi">0</span><span class="o">-</span><span class="n">ea</span><span class="o">-</span><span class="n">b94</span><span class="o">)</span> - <span class="n">Java</span> <span class="nf">HotSpot</span><span class="o">(</span><span class="n">TM</span><span class="o">)</span> <span class="mi">64</span><span class="o">-</span><span class="n">Bit</span> <span class="n">Server</span> <span class="nf">VM</span> <span class="o">(</span><span class="n">build</span> <span class="mf">25.0</span><span class="o">-</span><span class="n">b36</span><span class="o">,</span> <span class="n">mixed</span> <span class="n">mode</span><span class="o">)</span> - <span class="err">$</span> <span class="n">cd</span> <span class="o">/</span><span class="n">home</span><span class="o">/</span><span class="n">gfrost</span><span class="o">/</span><span class="n">aparapi</span><span class="o">/</span><span class="n">branches</span><span class="o">/</span><span class="n">lambda</span> - <span class="err">$</span> <span class="n">ant</span> - <span class="err">$</span> <span class="n">export</span> <span class="n">LD_LIBRARY_PATH</span><span class="o">=</span><span class="err">$</span><span class="o">{</span><span class="n">LD_LIBRARY_PATH</span><span class="o">}:</span><span class="err">$</span><span class="o">{</span><span class="n">OKRA</span><span class="o">}/</span><span class="n">dist</span><span class="o">/</span><span class="n">bin</span> - <span class="err">$</span> <span class="n">java</span> <span class="o">-</span><span class="nl">agentpath:</span><span class="n">com</span><span class="o">.</span><span class="na">aparapi</span><span class="o">.</span><span class="na">jni</span><span class="o">/</span><span class="n">dist</span><span class="o">/</span><span class="n">libaparapi_x86_64</span><span class="o">.</span><span class="na">so</span> <span class="o">-</span><span class="n">cp</span> <span class="n">com</span><span class="o">.</span><span class="na">aparapi</span><span class="o">/</span><span class="n">dist</span><span class="o">/</span><span class="n">aparapi</span><span class="o">.</span><span class="na">jar</span><span class="o">:</span><span class="err">$</span><span class="o">{</span><span class="n">OKRA</span><span class="o">}/</span><span class="n">dist</span><span class="o">/</span><span class="n">okra</span><span class="o">.</span><span class="na">jar</span> <span class="n">hsailtest</span><span class="o">.</span><span class="na">Squares</span> - </code></pre> - </div> - </main> - <footer class='page-footer'> - <div class='container'> - <div class='row'> - <div class='col l4 s12'> - <h5 class='white-text'>Help Aparapi Grow</h5> - <p class='grey-text text-lighten-4'>We are a team of volunteers working on this project like it's our full time job. Any amount would help support and continue development on this project and is greatly appreciated.</p> - <form action='https://www.paypal.com/cgi-bin/webscr' id='paypal-donate' method='post' target='_top'> - <input name='cmd' type='hidden' value='_s-xclick'> - <input name='encrypted' type='hidden' value='-----BEGIN PKCS7-----MIIHoAYJKoZIhvcNAQcEoIIHkTCCB40CAQExggEwMIIBLAIBADCBlDCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20CAQAwDQYJKoZIhvcNAQEBBQAEgYATcKxN8t35TG2x34eY272SuZO3QbGy+BTGIM5DRV6Hmosotzw2TF42ceWmbXb3Gk4Wy5kUgo4TgHExCZHUSlHUl+A9KWLFejotgQJPhbiBsnns3klWbKftA3LEnP/kz/SW7OyBlpluoHoEGb354/aoX3JEctp3akHiZEmD7JyEgjELMAkGBSsOAwIaBQAwggEcBgkqhkiG9w0BBwEwFAYIKoZIhvcNAwcECOGCJwba6JICgIH4RtE1LE3juagKs+swI5tb9Y2LacWo+qn1H1aLKeg57bQMqqcWYvkoO1joYoglPc1h4mO0egZjHPQ6ih0K0IYlXw2SRpNylSlIMUE3GW6smjSSwRhscZfXQYUnmQsfYvkFwoKrlZGf/1u0Q7nwlZ1szIKnDMZ5f+k8xBcM0sMNutn/y9CH6A3zo01gQBIF29+1WYAoQspNAnfWQy3ydV7nbjIA9ThDp2WquWw3EVlvqlvm/3C2AFuH/L4q0ltn3qjkCdzXK0O2jW3TRrzligPkAy6CN0Tw2jGW5GENNC1L92vHFH4kBXUPlhvw39TgoN7/KRUjVoYPYgugggOHMIIDgzCCAuygAwIBAgIBADANBgkqhkiG9w0BAQUFADCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20wHhcNMDQwMjEzMTAxMzE1WhcNMzUwMjEzMTAxMzE1WjCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20wgZ8wDQYJKoZIhvcNAQEBBQADgY0AMIGJAoGBAMFHTt38RMxLXJyO2SmS+Ndl72T7oKJ4u4uw+6awntALWh03PewmIJuzbALScsTS4sZoS1fKciBGoh11gIfHzylvkdNe/hJl66/RGqrj5rFb08sAABNTzDTiqqNpJeBsYs/c2aiGozptX2RlnBktH+SUNpAajW724Nv2Wvhif6sFAgMBAAGjge4wgeswHQYDVR0OBBYEFJaffLvGbxe9WT9S1wob7BDWZJRrMIG7BgNVHSMEgbMwgbCAFJaffLvGbxe9WT9S1wob7BDWZJRroYGUpIGRMIGOMQswCQYDVQQGEwJVUzELMAkGA1UECBMCQ0ExFjAUBgNVBAcTDU1vdW50YWluIFZpZXcxFDASBgNVBAoTC1BheVBhbCBJbmMuMRMwEQYDVQQLFApsaXZlX2NlcnRzMREwDwYDVQQDFAhsaXZlX2FwaTEcMBoGCSqGSIb3DQEJARYNcmVAcGF5cGFsLmNvbYIBADAMBgNVHRMEBTADAQH/MA0GCSqGSIb3DQEBBQUAA4GBAIFfOlaagFrl71+jq6OKidbWFSE+Q4FqROvdgIONth+8kSK//Y/4ihuE4Ymvzn5ceE3S/iBSQQMjyvb+s2TWbQYDwcp129OPIbD9epdr4tJOUNiSojw7BHwYRiPh58S1xGlFgHFXwrEBb3dgNbMUa+u4qectsMAXpVHnD9wIyfmHMYIBmjCCAZYCAQEwgZQwgY4xCzAJBgNVBAYTAlVTMQswCQYDVQQIEwJDQTEWMBQGA1UEBxMNTW91bnRhaW4gVmlldzEUMBIGA1UEChMLUGF5UGFsIEluYy4xEzARBgNVBAsUCmxpdmVfY2VydHMxETAPBgNVBAMUCGxpdmVfYXBpMRwwGgYJKoZIhvcNAQkBFg1yZUBwYXlwYWwuY29tAgEAMAkGBSsOAwIaBQCgXTAYBgkqhkiG9w0BCQMxCwYJKoZIhvcNAQcBMBwGCSqGSIb3DQEJBTEPFw0xNjExMjkyMjA1NTNaMCMGCSqGSIb3DQEJBDEWBBS4i3Exr/pFcKOJy8uKmH+nGIMjqDANBgkqhkiG9w0BAQEFAASBgDAbFZ2jieloeB/0wCAcvYCFAIXmmBaMS5js/byzU5gK7exSTlRMX74IkmHemItaOcw3wyFlu4i118D9K9SbSbFiX9DGDcezGh42u/6G8TuZMwlvmiehwMioTVcm4jWG40YLiv8pJZypfoSx2w4IAFb4na5i/E1qOrwQOpiBho+s-----END PKCS7-----'> - <button alt='PayPal - The safer, easier way to pay online!' class='btn waves-effect waves-light red lighten-3' name='action' type='submit'> - Donate Now - </button> - </form> - </div> - <div class='col l4 s12'> - <h5 class='white-text'>Join the Discussion</h5> - <p class='grey-text text-lighten-4'>We have a Gitter chat room set up where you can talk directly with us. Come in and discuss new features, future goals, general problems or questions, or anything else you can think of.</p> - <a class='btn waves-effect waves-light red lighten-3' href='https://gitter.im/Syncleus/aparapi' target='_blank'>Chat</a> - </div> - <div class='col l4 s12' style='overflow: hidden;'> - <h5 class='white-text'>Connect</h5> - <iframe allowtransparency='true' frameborder='0' height='30' scrolling='0' src='http://ghbtns.com/github-btn.html?user=Syncleus&repo=aparapi&type=watch&count=true&size=large' width='170'></iframe> - <br> - <a class='twitter-follow-button' data-dnt='true' data-show-count='true' data-size='large' href='https://twitter.com/AparapiLib'>Follow @AparapiLib</a> - <br> - <div class='g-follow' data-annotation='bubble' data-height='24' data-href='https://plus.google.com/102266131584900704956' data-rel='publisher'></div> - </div> - </div> - </div> - <div class='footer-copyright'> - <div class='container'> - © 2016-2017 Syncleus, All rights reserved. - <a class='grey-text text-lighten-4 right' href='https://github.com/Syncleus/aparapi/blob/master/LICENSE'>Apache License v2</a> - </div> - </div> - </footer> - <!-- Scripts --> - <script src='https://code.jquery.com/jquery-2.1.4.min.js'></script> - <script> - if (!window.jQuery) { document.write('<script src="bin/jquery-2.1.1.min.js"><\/script>'); } - </script> - <script src='/javascripts/jquery.timeago.js'></script> - <script src='/javascripts/materialize.min.js'></script> - <script src='/javascripts/lunr.min.js'></script> - <script src='/javascripts/search.js'></script> - <script src='/javascripts/materialize.js'></script> - <script src='/javascripts/init.js'></script> - <!-- Twitter Button --> - <script> - !function(d,s,id){var js,fjs=d.getElementsByTagName(s)[0],p=/^http:/.test(d.location)?'http':'https';if(!d.getElementById(id)){js=d.createElement(s);js.id=id;js.src=p+'://platform.twitter.com/widgets.js';fjs.parentNode.insertBefore(js,fjs);}}(document, 'script', 'twitter-wjs'); - </script> - <!-- Google Plus Button --> - <script async='' defer='defer' src='https://apis.google.com/js/platform.js'></script> - </body> +<head> +<meta content='text/html; charset=UTF-8' http-equiv='Content-Type'> +<meta content='width=device-width, initial-scale=1' name='viewport'> +<meta content='IE=edge' http-equiv='X-UA-Compatible'> +<meta content='no' name='msapplication-tap-highlight'> +<meta content='Aparapi is an Open-source framework for executing native Java code on the GPU, developed by Syncleus.' name='description'> +<title> +Aparapi | Using HSA Simulator +</title> +<!-- Favicons --> +<link href='/images/favicon/apple-touch-icon-152x152.png' rel='apple-touch-icon-precomposed'> +<meta content='#FFFFFF' name='msapplication-TileColor'> +<meta content='/images/favicon/mstile-144x144.png' name='msapplication-TileImage'> +<link href='/images/favicon/favicon-32x32.png' rel='icon' sizes='32x32'> +<!-- Android 5 Chrome Color --> +<meta content='#EE6E73' name='theme-color'> +<!-- CSS --> +<link href='/stylesheets/highlight.css' media='screen,projection' rel='stylesheet' type='text/css'> +<link href='/stylesheets/style.css' media='screen,projection' rel='stylesheet' type='text/css'> +<link href='http://fonts.googleapis.com/css?family=Inconsolata' rel='stylesheet' type='text/css'> +<link href='http://fonts.googleapis.com/icon?family=Material+Icons' rel='stylesheet'> +</head> +<body> +<header> +<div class='container'> +<a class='button-collapse top-nav waves-effect waves-light circle hide-on-large-only' data-activates='nav-mobile' href='#'> +<i class='material-icons'>menu</i> +</a> +</div> +<ul class='side-nav fixed' id='nav-mobile'> +<li class='logo'> +<a class='brand-logo' href='/' id='logo-container'> +<object data='/images/logo.svg' id='front-page-logo' type='image/svg+xml'>Your browser does not support SVG</object> +</a> +</li> +<li class='search'> +<div class='search-wrapper card'> +<input id='search'> +<i class='material-icons'>search</i> +<div class='search-results'></div> +</div> +</li> +<li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/">Overview</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Introduction</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/about.html">About</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/getting-started.html">Getting Started</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/faq.html">FAQ</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Documentation</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/aparapi-patterns.html">Aparapi Patterns</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/choosing-specific-devices.html">Choosing Specific Devices</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/converting-java-to-opencl.html">Converting Java to OpenCL</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/emulating-multiple-entrypoints.html">Emulating Multiple Entrypoints</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/explicit-buffer-handling.html">Explicit Buffer Handling</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/hsa-enabled-lambda.html">HSA Enabled Lambda</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/kernel-guidelines.html">Kernel Guidelines</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/library-agent-duality.html">Library Agent Duality</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/new-features.html">New Features</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/opencl-bindings.html">OpenCL Bindings</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/private-memory-space.html">Private Memory Space</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/profiling-the-kernel.html">Profiling the Kernel</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/setting-up-hsa.html">Setting Up HSA</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/unit-tests.html">Unit Tests</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/using-hsa-simulator.html">Using HSA Simulator</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/constant-memory.html">Constant Memory</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/local-memory.html">Local Memory</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/multiple-dim-ranges.html">Multiple Dim Ranges</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Proposals</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/multiple-dim-nd-range.html">Multiple Dim ND Range</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/lambdas.html">Lambdas</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/address-space-with-buffers.html">Address Space with Buffers</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/extensions.html">Extensions</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/device.html">Device</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/multiple-entry-points.html">Multiple Entry Points</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/lambda-syntax.html">Lambda Syntax</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/showcase.html">Showcase</a></li></ul></li> +</ul> +</header> +<main> +<div class='section no-pad-bot' id='index-banner'> +<div class='container'> +<h1 class='header center-on-small-only'>Using HSA Simulator</h1> +<div class='row center'> +<h4 class='header col s12 light center'>Using Aparapi lambda branch with HSA Simulator.</h4> +</div> + +</div> + +</div> +<div class='container'> +<h2>Introduction</h2> + +<p>Although HSA compatible devices are available, we understand that Aparapi developers may not have access to these devices.</p> + +<p>The HSA foundation has open sourced an LLVM based HSAIL emulator which we can use to test HSAIL generated code.</p> + +<p>The project is based here (<a href="https://github.com/HSAFoundation/Okra-Interface-to-HSAIL-Simulator" rel="nofollow">https://github.com/HSAFoundation/Okra-Interface-to-HSAIL-Simulator</a>) but we have extracted detailed download and build instructions for Ubuntu below.</p> + +<p>Aparapi users/developers can use this simulator to test correctness.</p> + +<h2>Building the HSA Simulator on Ubuntu</h2> + +<p>We assume you have ant, svn and g++ available because you can build other aparapi artifacts.</p> + +<p>You will also need git, libelf-dev, libdwarf-dev, flex and cmake</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="err">$</span> <span class="n">sudo</span> <span class="n">apt</span><span class="o">-</span><span class="n">get</span> <span class="n">install</span> <span class="n">git</span> <span class="n">libelf</span><span class="o">-</span><span class="n">dev</span> <span class="n">libdwarf</span><span class="o">-</span><span class="n">dev</span> <span class="n">flex</span> <span class="n">cmake</span> +</code></pre></div> +<p>login…</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="err">$</span> <span class="n">git</span> <span class="n">clone</span> <span class="nl">https:</span><span class="c1">//github.com/HSAFoundation/Okra-Interface-to-HSAIL-Simulator.git okra</span> +<span class="err">$</span> <span class="n">cd</span> <span class="n">okra</span> +<span class="err">$</span> <span class="n">ant</span> <span class="o">-</span><span class="n">f</span> <span class="n">build</span><span class="o">-</span><span class="n">okra</span><span class="o">-</span><span class="n">sim</span><span class="o">.</span><span class="na">xml</span> +</code></pre></div> +<h2>The build should take approximately 15 mins.</h2> + +<p>How to setup and test an initial lambda/HSA enabled Aparapi build +Assuming you have built okra in /home/gfrost/okra</p> + +<p>Assuming your Java8 JDK is in /home/gfrost/jdk1.8.0</p> + +<p>Assuming your aparapi svn trunk is /home/gfrost/aparapi</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="err">$</span> <span class="n">export</span> <span class="n">JAVA_HOME</span><span class="o">=/</span><span class="n">home</span><span class="o">/</span><span class="n">gfrost</span><span class="o">/</span><span class="n">jdk1</span><span class="o">.</span><span class="mf">8.0</span> +<span class="err">$</span> <span class="n">export</span> <span class="n">OKRA</span><span class="o">=/</span><span class="n">home</span><span class="o">/</span><span class="n">gfrost</span><span class="o">/</span><span class="n">okra</span> +<span class="err">$</span> <span class="n">export</span> <span class="n">PATH</span><span class="o">=</span><span class="err">$</span><span class="o">{</span><span class="n">PATH</span><span class="o">}:</span><span class="err">$</span><span class="o">{</span><span class="n">JAVA_HOME</span><span class="o">}/</span><span class="nl">bin:</span><span class="err">$</span><span class="o">{</span><span class="n">OKRA</span><span class="o">}/</span><span class="n">dist</span><span class="o">/</span><span class="n">bin</span> +<span class="err">$</span> <span class="n">java</span> <span class="o">-</span><span class="n">version</span> +<span class="n">java</span> <span class="n">version</span> <span class="s">"1.8.0-ea"</span> +<span class="n">Java</span><span class="o">(</span><span class="n">TM</span><span class="o">)</span> <span class="n">SE</span> <span class="n">Runtime</span> <span class="nf">Environment</span> <span class="o">(</span><span class="n">build</span> <span class="mf">1.8</span><span class="o">.</span><span class="mi">0</span><span class="o">-</span><span class="n">ea</span><span class="o">-</span><span class="n">b94</span><span class="o">)</span> +<span class="n">Java</span> <span class="nf">HotSpot</span><span class="o">(</span><span class="n">TM</span><span class="o">)</span> <span class="mi">64</span><span class="o">-</span><span class="n">Bit</span> <span class="n">Server</span> <span class="nf">VM</span> <span class="o">(</span><span class="n">build</span> <span class="mf">25.0</span><span class="o">-</span><span class="n">b36</span><span class="o">,</span> <span class="n">mixed</span> <span class="n">mode</span><span class="o">)</span> +<span class="err">$</span> <span class="n">cd</span> <span class="o">/</span><span class="n">home</span><span class="o">/</span><span class="n">gfrost</span><span class="o">/</span><span class="n">aparapi</span><span class="o">/</span><span class="n">branches</span><span class="o">/</span><span class="n">lambda</span> +<span class="err">$</span> <span class="n">ant</span> +<span class="err">$</span> <span class="n">export</span> <span class="n">LD_LIBRARY_PATH</span><span class="o">=</span><span class="err">$</span><span class="o">{</span><span class="n">LD_LIBRARY_PATH</span><span class="o">}:</span><span class="err">$</span><span class="o">{</span><span class="n">OKRA</span><span class="o">}/</span><span class="n">dist</span><span class="o">/</span><span class="n">bin</span> +<span class="err">$</span> <span class="n">java</span> <span class="o">-</span><span class="nl">agentpath:</span><span class="n">com</span><span class="o">.</span><span class="na">aparapi</span><span class="o">.</span><span class="na">jni</span><span class="o">/</span><span class="n">dist</span><span class="o">/</span><span class="n">libaparapi_x86_64</span><span class="o">.</span><span class="na">so</span> <span class="o">-</span><span class="n">cp</span> <span class="n">com</span><span class="o">.</span><span class="na">aparapi</span><span class="o">/</span><span class="n">dist</span><span class="o">/</span><span class="n">aparapi</span><span class="o">.</span><span class="na">jar</span><span class="o">:</span><span class="err">$</span><span class="o">{</span><span class="n">OKRA</span><span class="o">}/</span><span class="n">dist</span><span class="o">/</span><span class="n">okra</span><span class="o">.</span><span class="na">jar</span> <span class="n">hsailtest</span><span class="o">.</span><span class="na">Squares</span> +</code></pre></div> +</div> +</main> +<footer class='page-footer'> +<div class='container'> +<div class='row'> +<div class='col l4 s12'> +<h5 class='white-text'>Help Aparapi Grow</h5> +<p class='grey-text text-lighten-4'>We are a team of volunteers working on this project like it's our full time job. Any amount would help support and continue development on this project and is greatly appreciated.</p> +<form action='https://www.paypal.com/cgi-bin/webscr' id='paypal-donate' method='post' target='_top'> +<input name='cmd' type='hidden' value='_s-xclick'> +<input name='encrypted' type='hidden' value='-----BEGIN PKCS7-----MIIHoAYJKoZIhvcNAQcEoIIHkTCCB40CAQExggEwMIIBLAIBADCBlDCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20CAQAwDQYJKoZIhvcNAQEBBQAEgYATcKxN8t35TG2x34eY272SuZO3QbGy+BTGIM5DRV6Hmosotzw2TF42ceWmbXb3Gk4Wy5kUgo4TgHExCZHUSlHUl+A9KWLFejotgQJPhbiBsnns3klWbKftA3LEnP/kz/SW7OyBlpluoHoEGb354/aoX3JEctp3akHiZEmD7JyEgjELMAkGBSsOAwIaBQAwggEcBgkqhkiG9w0BBwEwFAYIKoZIhvcNAwcECOGCJwba6JICgIH4RtE1LE3juagKs+swI5tb9Y2LacWo+qn1H1aLKeg57bQMqqcWYvkoO1joYoglPc1h4mO0egZjHPQ6ih0K0IYlXw2SRpNylSlIMUE3GW6smjSSwRhscZfXQYUnmQsfYvkFwoKrlZGf/1u0Q7nwlZ1szIKnDMZ5f+k8xBcM0sMNutn/y9CH6A3zo01gQBIF29+1WYAoQspNAnfWQy3ydV7nbjIA9ThDp2WquWw3EVlvqlvm/3C2AFuH/L4q0ltn3qjkCdzXK0O2jW3TRrzligPkAy6CN0Tw2jGW5GENNC1L92vHFH4kBXUPlhvw39TgoN7/KRUjVoYPYgugggOHMIIDgzCCAuygAwIBAgIBADANBgkqhkiG9w0BAQUFADCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20wHhcNMDQwMjEzMTAxMzE1WhcNMzUwMjEzMTAxMzE1WjCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20wgZ8wDQYJKoZIhvcNAQEBBQADgY0AMIGJAoGBAMFHTt38RMxLXJyO2SmS+Ndl72T7oKJ4u4uw+6awntALWh03PewmIJuzbALScsTS4sZoS1fKciBGoh11gIfHzylvkdNe/hJl66/RGqrj5rFb08sAABNTzDTiqqNpJeBsYs/c2aiGozptX2RlnBktH+SUNpAajW724Nv2Wvhif6sFAgMBAAGjge4wgeswHQYDVR0OBBYEFJaffLvGbxe9WT9S1wob7BDWZJRrMIG7BgNVHSMEgbMwgbCAFJaffLvGbxe9WT9S1wob7BDWZJRroYGUpIGRMIGOMQswCQYDVQQGEwJVUzELMAkGA1UECBMCQ0ExFjAUBgNVBAcTDU1vdW50YWluIFZpZXcxFDASBgNVBAoTC1BheVBhbCBJbmMuMRMwEQYDVQQLFApsaXZlX2NlcnRzMREwDwYDVQQDFAhsaXZlX2FwaTEcMBoGCSqGSIb3DQEJARYNcmVAcGF5cGFsLmNvbYIBADAMBgNVHRMEBTADAQH/MA0GCSqGSIb3DQEBBQUAA4GBAIFfOlaagFrl71+jq6OKidbWFSE+Q4FqROvdgIONth+8kSK//Y/4ihuE4Ymvzn5ceE3S/iBSQQMjyvb+s2TWbQYDwcp129OPIbD9epdr4tJOUNiSojw7BHwYRiPh58S1xGlFgHFXwrEBb3dgNbMUa+u4qectsMAXpVHnD9wIyfmHMYIBmjCCAZYCAQEwgZQwgY4xCzAJBgNVBAYTAlVTMQswCQYDVQQIEwJDQTEWMBQGA1UEBxMNTW91bnRhaW4gVmlldzEUMBIGA1UEChMLUGF5UGFsIEluYy4xEzARBgNVBAsUCmxpdmVfY2VydHMxETAPBgNVBAMUCGxpdmVfYXBpMRwwGgYJKoZIhvcNAQkBFg1yZUBwYXlwYWwuY29tAgEAMAkGBSsOAwIaBQCgXTAYBgkqhkiG9w0BCQMxCwYJKoZIhvcNAQcBMBwGCSqGSIb3DQEJBTEPFw0xNjExMjkyMjA1NTNaMCMGCSqGSIb3DQEJBDEWBBS4i3Exr/pFcKOJy8uKmH+nGIMjqDANBgkqhkiG9w0BAQEFAASBgDAbFZ2jieloeB/0wCAcvYCFAIXmmBaMS5js/byzU5gK7exSTlRMX74IkmHemItaOcw3wyFlu4i118D9K9SbSbFiX9DGDcezGh42u/6G8TuZMwlvmiehwMioTVcm4jWG40YLiv8pJZypfoSx2w4IAFb4na5i/E1qOrwQOpiBho+s-----END PKCS7----- +'> +<button alt='PayPal - The safer, easier way to pay online!' class='btn waves-effect waves-light red lighten-3' name='action' type='submit'> +Donate Now +</button> +</form> +</div> +<div class='col l4 s12'> +<h5 class='white-text'>Join the Discussion</h5> +<p class='grey-text text-lighten-4'>We have a Gitter chat room set up where you can talk directly with us. Come in and discuss new features, future goals, general problems or questions, or anything else you can think of.</p> +<a class='btn waves-effect waves-light red lighten-3' href='https://gitter.im/Syncleus/aparapi' target='_blank'>Chat</a> +</div> +<div class='col l4 s12' style='overflow: hidden;'> +<h5 class='white-text'>Connect</h5> +<iframe allowtransparency='true' frameborder='0' height='30' scrolling='0' src='http://ghbtns.com/github-btn.html?user=Syncleus&repo=aparapi&type=watch&count=true&size=large' width='170'></iframe> +<br> +<a class='twitter-follow-button' data-dnt='true' data-show-count='true' data-size='large' href='https://twitter.com/AparapiLib'>Follow @AparapiLib</a> +<br> +<div class='g-follow' data-annotation='bubble' data-height='24' data-href='https://plus.google.com/102266131584900704956' data-rel='publisher'></div> +</div> +</div> +</div> +<div class='footer-copyright'> +<div class='container'> +© 2016-2017 Syncleus, All rights reserved. +<a class='grey-text text-lighten-4 right' href='https://github.com/Syncleus/aparapi/blob/master/LICENSE'>Apache License v2</a> +</div> +</div> +</footer> +<!-- Scripts --> +<script src='https://code.jquery.com/jquery-2.1.4.min.js'></script> +<script> + if (!window.jQuery) { document.write('<script src="bin/jquery-2.1.1.min.js"><\/script>'); } +</script> +<script src='/javascripts/jquery.timeago.js'></script> +<script src='/javascripts/materialize.min.js'></script> +<script src='/javascripts/lunr.min.js'></script> +<script src='/javascripts/search.js'></script> +<script src='/javascripts/materialize.js'></script> +<script src='/javascripts/init.js'></script> +<!-- Twitter Button --> +<script> + !function(d,s,id){var js,fjs=d.getElementsByTagName(s)[0],p=/^http:/.test(d.location)?'http':'https';if(!d.getElementById(id)){js=d.createElement(s);js.id=id;js.src=p+'://platform.twitter.com/widgets.js';fjs.parentNode.insertBefore(js,fjs);}}(document, 'script', 'twitter-wjs'); +</script> +<!-- Google Plus Button --> +<script async='' defer='defer' src='https://apis.google.com/js/platform.js'></script> +</body> </html> diff --git a/images/nbody_cpu.gif b/images/nbody_cpu.gif index 8af3c84713257ce7218f4e83138d9ee57de3a5dc..33c3029e09fa9c43f7540c7029d5da54f61aba9b 100644 Binary files a/images/nbody_cpu.gif and b/images/nbody_cpu.gif differ diff --git a/images/nbody_gpu.gif b/images/nbody_gpu.gif index 24e7b39169fbd938e40c0610155bb948c9dd8ca2..5ed03b20d02c42a6ed8de4c98ef4ee00ae78bb60 100644 Binary files a/images/nbody_gpu.gif and b/images/nbody_gpu.gif differ diff --git a/index.html b/index.html index 248791b59f622931bc982c36605bf8e71caa2ca2..cff8ca466c309cb5eef4d59e86b14b1e4f1526de 100644 --- a/index.html +++ b/index.html @@ -1,176 +1,180 @@ <!DOCTYPE html> <html lang='en'> - <head> - <meta content='text/html; charset=UTF-8' http-equiv='Content-Type'> - <meta content='width=device-width, initial-scale=1' name='viewport'> - <meta content='IE=edge' http-equiv='X-UA-Compatible'> - <meta content='no' name='msapplication-tap-highlight'> - <meta content='Aparapi is an Open-source framework for executing native Java code on the GPU, developed by Syncleus.' name='description'> - <title> - Aparapi | Aparapi - </title> - <!-- Favicons --> - <link href='/images/favicon/apple-touch-icon-152x152.png' rel='apple-touch-icon-precomposed'> - <meta content='#FFFFFF' name='msapplication-TileColor'> - <meta content='/images/favicon/mstile-144x144.png' name='msapplication-TileImage'> - <link href='/images/favicon/favicon-32x32.png' rel='icon' sizes='32x32'> - <!-- Android 5 Chrome Color --> - <meta content='#EE6E73' name='theme-color'> - <!-- CSS --> - <link href='/stylesheets/highlight.css' media='screen,projection' rel='stylesheet' type='text/css'> - <link href='/stylesheets/style.css' media='screen,projection' rel='stylesheet' type='text/css'> - <link href='http://fonts.googleapis.com/css?family=Inconsolata' rel='stylesheet' type='text/css'> - <link href='http://fonts.googleapis.com/icon?family=Material+Icons' rel='stylesheet'> - </head> - <body> - <header> - <div class='container'> - <a class='button-collapse top-nav waves-effect waves-light circle hide-on-large-only' data-activates='nav-mobile' href='#'> - <i class='material-icons'>menu</i> - </a> - </div> - <ul class='side-nav fixed' id='nav-mobile'> - <li class='logo'> - <a class='brand-logo' href='/' id='logo-container'> - <object data='/images/logo.svg' id='front-page-logo' type='image/svg+xml'>Your browser does not support SVG</object> - </a> - </li> - <li class='search'> - <div class='search-wrapper card'> - <input id='search'> - <i class='material-icons'>search</i> - <div class='search-results'></div> - </div> - </li> - <li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/">Overview</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Introduction</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/about.html">About</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/getting-started.html">Getting Started</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/faq.html">FAQ</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Documentation</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/aparapi-patterns.html">Aparapi Patterns</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/choosing-specific-devices.html">Choosing Specific Devices</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/converting-java-to-opencl.html">Converting Java to OpenCL</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/emulating-multiple-entrypoints.html">Emulating Multiple Entrypoints</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/explicit-buffer-handling.html">Explicit Buffer Handling</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/hsa-enabled-lambda.html">HSA Enabled Lambda</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/kernel-guidelines.html">Kernel Guidelines</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/library-agent-duality.html">Library Agent Duality</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/new-features.html">New Features</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/opencl-bindings.html">OpenCL Bindings</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/private-memory-space.html">Private Memory Space</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/profiling-the-kernel.html">Profiling the Kernel</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/setting-up-hsa.html">Setting Up HSA</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/unit-tests.html">Unit Tests</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/using-hsa-simulator.html">Using HSA Simulator</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/constant-memory.html">Constant Memory</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/local-memory.html">Local Memory</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/multiple-dim-ranges.html">Multiple Dim Ranges</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Proposals</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/multiple-dim-nd-range.html">Multiple Dim ND Range</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/lambdas.html">Lambdas</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/address-space-with-buffers.html">Address Space with Buffers</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/extensions.html">Extensions</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/device.html">Device</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/multiple-entry-points.html">Multiple Entry Points</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/lambda-syntax.html">Lambda Syntax</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/showcase.html">Showcase</a></li></ul></li> - </ul> - </header> - <main> - <div class='section no-pad-bot' id='index-banner'> - <div class='container'> - <h1 class='header center-on-small-only'>Aparapi</h1> - <div class='row center'> - <h4 class='header col s12 light center'>Open-source framework for executing native Java code on the GPU.</h4> - </div> - <div class='row center'> - <a class='btn-large waves-effect waves-light' href='/introduction/getting-started.html' id='download-button'>Get Started</a> - </div> - <div class='row center'> - <a class='red-text text-lighten-4' href='https://github.com/Syncleus/aparapi'>Release v1.4.0</a> - </div> - <br> - </div> - <div class='github-commit'> - <div class='container'> - <div class='commit'> - Latest Commit on Github: -   - <a class='sha' href=''></a> -   - <span class='date'></span> - <a class='btn-flat right grey-text text-lighten-5 waves-effect waves-light hide-on-small-only' href='https://github.com/Syncleus/aparapi' id='github-button'>Github</a> - </div> - </div> - </div> - </div> - <div class='container'> - <div class='section'> - <div class='row'> - <div class='col s12 m8 offset-m2'> - <br> - <img id='responsive-img' src='images/responsive.png'> - </div> - </div> - <div class='row'> - <h3 class='col s12 light center header'>Aparapi simplifies life for developers and reduces server costs.</h3> - </div> - <!-- Promo Section --> - <div class='row'> - <div class='col s12 m4'> - <div class='center promo'> - <i class='material-icons'>flash_on</i> - <p class='promo-caption'>Speeds up development</p> - <p class='light center'>We did most of the heavy lifting for you to provide a framework capable of executing native Java code directly on the GPU. No more writing code twice, write once run anywhere.</p> - </div> - </div> - <div class='col s12 m4'> - <div class='center promo'> - <i class='material-icons'>attach_money</i> - <p class='promo-caption'>Save Money on Servers</p> - <p class='light center'>By utilizing the GPU to run your algorithms it is possible to acheive speedups hundreds of times that of a CPU alone. This can mean the difference between needing only a single server versus an entire data center.</p> - </div> - </div> - <div class='col s12 m4'> - <div class='center promo'> - <i class='material-icons'>settings</i> - <p class='promo-caption'>Flexible</p> - <p class='light center'>The full power and flexibility of OpenCL coding is fully exposed. All the low level calls as well as the ability to manipulate or post kernel files are still accessible to developers.</p> - </div> - </div> - </div> - </div> - <div class='divider'></div> - <div class='section'> - <div class='row center'> - <h3 class='light header'>Aparapi Showcase</h3> - <p class='col s8 offset-s2 caption'>Checkout what people are creating with Aparapi. Get inspired by these projects and you can even submit your own projects to be showcased here.</p> - <a class='btn-large waves-effect waves-light' href='showcase.html'>Explore our Showcase</a> - </div> - </div> - </div> - </main> - <footer class='page-footer'> - <div class='container'> - <div class='row'> - <div class='col l4 s12'> - <h5 class='white-text'>Help Aparapi Grow</h5> - <p class='grey-text text-lighten-4'>We are a team of volunteers working on this project like it's our full time job. Any amount would help support and continue development on this project and is greatly appreciated.</p> - <form action='https://www.paypal.com/cgi-bin/webscr' id='paypal-donate' method='post' target='_top'> - <input name='cmd' type='hidden' value='_s-xclick'> - <input name='encrypted' type='hidden' value='-----BEGIN PKCS7-----MIIHoAYJKoZIhvcNAQcEoIIHkTCCB40CAQExggEwMIIBLAIBADCBlDCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20CAQAwDQYJKoZIhvcNAQEBBQAEgYATcKxN8t35TG2x34eY272SuZO3QbGy+BTGIM5DRV6Hmosotzw2TF42ceWmbXb3Gk4Wy5kUgo4TgHExCZHUSlHUl+A9KWLFejotgQJPhbiBsnns3klWbKftA3LEnP/kz/SW7OyBlpluoHoEGb354/aoX3JEctp3akHiZEmD7JyEgjELMAkGBSsOAwIaBQAwggEcBgkqhkiG9w0BBwEwFAYIKoZIhvcNAwcECOGCJwba6JICgIH4RtE1LE3juagKs+swI5tb9Y2LacWo+qn1H1aLKeg57bQMqqcWYvkoO1joYoglPc1h4mO0egZjHPQ6ih0K0IYlXw2SRpNylSlIMUE3GW6smjSSwRhscZfXQYUnmQsfYvkFwoKrlZGf/1u0Q7nwlZ1szIKnDMZ5f+k8xBcM0sMNutn/y9CH6A3zo01gQBIF29+1WYAoQspNAnfWQy3ydV7nbjIA9ThDp2WquWw3EVlvqlvm/3C2AFuH/L4q0ltn3qjkCdzXK0O2jW3TRrzligPkAy6CN0Tw2jGW5GENNC1L92vHFH4kBXUPlhvw39TgoN7/KRUjVoYPYgugggOHMIIDgzCCAuygAwIBAgIBADANBgkqhkiG9w0BAQUFADCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20wHhcNMDQwMjEzMTAxMzE1WhcNMzUwMjEzMTAxMzE1WjCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20wgZ8wDQYJKoZIhvcNAQEBBQADgY0AMIGJAoGBAMFHTt38RMxLXJyO2SmS+Ndl72T7oKJ4u4uw+6awntALWh03PewmIJuzbALScsTS4sZoS1fKciBGoh11gIfHzylvkdNe/hJl66/RGqrj5rFb08sAABNTzDTiqqNpJeBsYs/c2aiGozptX2RlnBktH+SUNpAajW724Nv2Wvhif6sFAgMBAAGjge4wgeswHQYDVR0OBBYEFJaffLvGbxe9WT9S1wob7BDWZJRrMIG7BgNVHSMEgbMwgbCAFJaffLvGbxe9WT9S1wob7BDWZJRroYGUpIGRMIGOMQswCQYDVQQGEwJVUzELMAkGA1UECBMCQ0ExFjAUBgNVBAcTDU1vdW50YWluIFZpZXcxFDASBgNVBAoTC1BheVBhbCBJbmMuMRMwEQYDVQQLFApsaXZlX2NlcnRzMREwDwYDVQQDFAhsaXZlX2FwaTEcMBoGCSqGSIb3DQEJARYNcmVAcGF5cGFsLmNvbYIBADAMBgNVHRMEBTADAQH/MA0GCSqGSIb3DQEBBQUAA4GBAIFfOlaagFrl71+jq6OKidbWFSE+Q4FqROvdgIONth+8kSK//Y/4ihuE4Ymvzn5ceE3S/iBSQQMjyvb+s2TWbQYDwcp129OPIbD9epdr4tJOUNiSojw7BHwYRiPh58S1xGlFgHFXwrEBb3dgNbMUa+u4qectsMAXpVHnD9wIyfmHMYIBmjCCAZYCAQEwgZQwgY4xCzAJBgNVBAYTAlVTMQswCQYDVQQIEwJDQTEWMBQGA1UEBxMNTW91bnRhaW4gVmlldzEUMBIGA1UEChMLUGF5UGFsIEluYy4xEzARBgNVBAsUCmxpdmVfY2VydHMxETAPBgNVBAMUCGxpdmVfYXBpMRwwGgYJKoZIhvcNAQkBFg1yZUBwYXlwYWwuY29tAgEAMAkGBSsOAwIaBQCgXTAYBgkqhkiG9w0BCQMxCwYJKoZIhvcNAQcBMBwGCSqGSIb3DQEJBTEPFw0xNjExMjkyMjA1NTNaMCMGCSqGSIb3DQEJBDEWBBS4i3Exr/pFcKOJy8uKmH+nGIMjqDANBgkqhkiG9w0BAQEFAASBgDAbFZ2jieloeB/0wCAcvYCFAIXmmBaMS5js/byzU5gK7exSTlRMX74IkmHemItaOcw3wyFlu4i118D9K9SbSbFiX9DGDcezGh42u/6G8TuZMwlvmiehwMioTVcm4jWG40YLiv8pJZypfoSx2w4IAFb4na5i/E1qOrwQOpiBho+s-----END PKCS7-----'> - <button alt='PayPal - The safer, easier way to pay online!' class='btn waves-effect waves-light red lighten-3' name='action' type='submit'> - Donate Now - </button> - </form> - </div> - <div class='col l4 s12'> - <h5 class='white-text'>Join the Discussion</h5> - <p class='grey-text text-lighten-4'>We have a Gitter chat room set up where you can talk directly with us. Come in and discuss new features, future goals, general problems or questions, or anything else you can think of.</p> - <a class='btn waves-effect waves-light red lighten-3' href='https://gitter.im/Syncleus/aparapi' target='_blank'>Chat</a> - </div> - <div class='col l4 s12' style='overflow: hidden;'> - <h5 class='white-text'>Connect</h5> - <iframe allowtransparency='true' frameborder='0' height='30' scrolling='0' src='http://ghbtns.com/github-btn.html?user=Syncleus&repo=aparapi&type=watch&count=true&size=large' width='170'></iframe> - <br> - <a class='twitter-follow-button' data-dnt='true' data-show-count='true' data-size='large' href='https://twitter.com/AparapiLib'>Follow @AparapiLib</a> - <br> - <div class='g-follow' data-annotation='bubble' data-height='24' data-href='https://plus.google.com/102266131584900704956' data-rel='publisher'></div> - </div> - </div> - </div> - <div class='footer-copyright'> - <div class='container'> - © 2016-2017 Syncleus, All rights reserved. - <a class='grey-text text-lighten-4 right' href='https://github.com/Syncleus/aparapi/blob/master/LICENSE'>Apache License v2</a> - </div> - </div> - </footer> - <!-- Scripts --> - <script src='https://code.jquery.com/jquery-2.1.4.min.js'></script> - <script> - if (!window.jQuery) { document.write('<script src="bin/jquery-2.1.1.min.js"><\/script>'); } - </script> - <script src='/javascripts/jquery.timeago.js'></script> - <script src='/javascripts/materialize.min.js'></script> - <script src='/javascripts/lunr.min.js'></script> - <script src='/javascripts/search.js'></script> - <script src='/javascripts/materialize.js'></script> - <script src='/javascripts/init.js'></script> - <!-- Twitter Button --> - <script> - !function(d,s,id){var js,fjs=d.getElementsByTagName(s)[0],p=/^http:/.test(d.location)?'http':'https';if(!d.getElementById(id)){js=d.createElement(s);js.id=id;js.src=p+'://platform.twitter.com/widgets.js';fjs.parentNode.insertBefore(js,fjs);}}(document, 'script', 'twitter-wjs'); - </script> - <!-- Google Plus Button --> - <script async='' defer='defer' src='https://apis.google.com/js/platform.js'></script> - </body> +<head> +<meta content='text/html; charset=UTF-8' http-equiv='Content-Type'> +<meta content='width=device-width, initial-scale=1' name='viewport'> +<meta content='IE=edge' http-equiv='X-UA-Compatible'> +<meta content='no' name='msapplication-tap-highlight'> +<meta content='Aparapi is an Open-source framework for executing native Java code on the GPU, developed by Syncleus.' name='description'> +<title> +Aparapi | Aparapi +</title> +<!-- Favicons --> +<link href='/images/favicon/apple-touch-icon-152x152.png' rel='apple-touch-icon-precomposed'> +<meta content='#FFFFFF' name='msapplication-TileColor'> +<meta content='/images/favicon/mstile-144x144.png' name='msapplication-TileImage'> +<link href='/images/favicon/favicon-32x32.png' rel='icon' sizes='32x32'> +<!-- Android 5 Chrome Color --> +<meta content='#EE6E73' name='theme-color'> +<!-- CSS --> +<link href='/stylesheets/highlight.css' media='screen,projection' rel='stylesheet' type='text/css'> +<link href='/stylesheets/style.css' media='screen,projection' rel='stylesheet' type='text/css'> +<link href='http://fonts.googleapis.com/css?family=Inconsolata' rel='stylesheet' type='text/css'> +<link href='http://fonts.googleapis.com/icon?family=Material+Icons' rel='stylesheet'> +</head> +<body> +<header> +<div class='container'> +<a class='button-collapse top-nav waves-effect waves-light circle hide-on-large-only' data-activates='nav-mobile' href='#'> +<i class='material-icons'>menu</i> +</a> +</div> +<ul class='side-nav fixed' id='nav-mobile'> +<li class='logo'> +<a class='brand-logo' href='/' id='logo-container'> +<object data='/images/logo.svg' id='front-page-logo' type='image/svg+xml'>Your browser does not support SVG</object> +</a> +</li> +<li class='search'> +<div class='search-wrapper card'> +<input id='search'> +<i class='material-icons'>search</i> +<div class='search-results'></div> +</div> +</li> +<li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/">Overview</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Introduction</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/about.html">About</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/getting-started.html">Getting Started</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/faq.html">FAQ</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Documentation</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/aparapi-patterns.html">Aparapi Patterns</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/choosing-specific-devices.html">Choosing Specific Devices</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/converting-java-to-opencl.html">Converting Java to OpenCL</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/emulating-multiple-entrypoints.html">Emulating Multiple Entrypoints</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/explicit-buffer-handling.html">Explicit Buffer Handling</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/hsa-enabled-lambda.html">HSA Enabled Lambda</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/kernel-guidelines.html">Kernel Guidelines</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/library-agent-duality.html">Library Agent Duality</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/new-features.html">New Features</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/opencl-bindings.html">OpenCL Bindings</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/private-memory-space.html">Private Memory Space</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/profiling-the-kernel.html">Profiling the Kernel</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/setting-up-hsa.html">Setting Up HSA</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/unit-tests.html">Unit Tests</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/using-hsa-simulator.html">Using HSA Simulator</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/constant-memory.html">Constant Memory</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/local-memory.html">Local Memory</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/multiple-dim-ranges.html">Multiple Dim Ranges</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Proposals</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/multiple-dim-nd-range.html">Multiple Dim ND Range</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/lambdas.html">Lambdas</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/address-space-with-buffers.html">Address Space with Buffers</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/extensions.html">Extensions</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/device.html">Device</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/multiple-entry-points.html">Multiple Entry Points</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/lambda-syntax.html">Lambda Syntax</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/showcase.html">Showcase</a></li></ul></li> +</ul> +</header> +<main> +<div class='section no-pad-bot' id='index-banner'> +<div class='container'> +<h1 class='header center-on-small-only'>Aparapi</h1> +<div class='row center'> +<h4 class='header col s12 light center'>Open-source framework for executing native Java code on the GPU.</h4> +</div> +<div class='row center'> +<a class='btn-large waves-effect waves-light' href='/introduction/getting-started.html' id='download-button'>Get Started</a> +</div> +<div class='row center'> +<a class='red-text text-lighten-4' href='https://github.com/Syncleus/aparapi'>Release v1.4.1</a> +</div> +<br> + +</div> +<div class='github-commit'> +<div class='container'> +<div class='commit'> +Latest Commit on Github: +  +<a class='sha' href=''></a> +  +<span class='date'></span> +<a class='btn-flat right grey-text text-lighten-5 waves-effect waves-light hide-on-small-only' href='https://github.com/Syncleus/aparapi' id='github-button'>Github</a> +</div> +</div> +</div> + +</div> +<div class='container'> +<div class='section'> +<div class='row'> +<div class='col s12 m8 offset-m2'> +<br> +<img id='responsive-img' src='images/responsive.png'> +</div> +</div> +<div class='row'> +<h3 class='col s12 light center header'>Aparapi simplifies life for developers and reduces server costs.</h3> +</div> +<!-- Promo Section --> +<div class='row'> +<div class='col s12 m4'> +<div class='center promo'> +<i class='material-icons'>flash_on</i> +<p class='promo-caption'>Speeds up development</p> +<p class='light center'>We did most of the heavy lifting for you to provide a framework capable of executing native Java code directly on the GPU. No more writing code twice, write once run anywhere.</p> +</div> +</div> +<div class='col s12 m4'> +<div class='center promo'> +<i class='material-icons'>attach_money</i> +<p class='promo-caption'>Save Money on Servers</p> +<p class='light center'>By utilizing the GPU to run your algorithms it is possible to acheive speedups hundreds of times that of a CPU alone. This can mean the difference between needing only a single server versus an entire data center.</p> +</div> +</div> +<div class='col s12 m4'> +<div class='center promo'> +<i class='material-icons'>settings</i> +<p class='promo-caption'>Flexible</p> +<p class='light center'>The full power and flexibility of OpenCL coding is fully exposed. All the low level calls as well as the ability to manipulate or post kernel files are still accessible to developers.</p> +</div> +</div> +</div> +</div> +<div class='divider'></div> +<div class='section'> +<div class='row center'> +<h3 class='light header'>Aparapi Showcase</h3> +<p class='col s8 offset-s2 caption'>Checkout what people are creating with Aparapi. Get inspired by these projects and you can even submit your own projects to be showcased here.</p> +<a class='btn-large waves-effect waves-light' href='showcase.html'>Explore our Showcase</a> +</div> +</div> + +</div> +</main> +<footer class='page-footer'> +<div class='container'> +<div class='row'> +<div class='col l4 s12'> +<h5 class='white-text'>Help Aparapi Grow</h5> +<p class='grey-text text-lighten-4'>We are a team of volunteers working on this project like it's our full time job. Any amount would help support and continue development on this project and is greatly appreciated.</p> +<form action='https://www.paypal.com/cgi-bin/webscr' id='paypal-donate' method='post' target='_top'> +<input name='cmd' type='hidden' value='_s-xclick'> +<input name='encrypted' type='hidden' value='-----BEGIN PKCS7-----MIIHoAYJKoZIhvcNAQcEoIIHkTCCB40CAQExggEwMIIBLAIBADCBlDCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20CAQAwDQYJKoZIhvcNAQEBBQAEgYATcKxN8t35TG2x34eY272SuZO3QbGy+BTGIM5DRV6Hmosotzw2TF42ceWmbXb3Gk4Wy5kUgo4TgHExCZHUSlHUl+A9KWLFejotgQJPhbiBsnns3klWbKftA3LEnP/kz/SW7OyBlpluoHoEGb354/aoX3JEctp3akHiZEmD7JyEgjELMAkGBSsOAwIaBQAwggEcBgkqhkiG9w0BBwEwFAYIKoZIhvcNAwcECOGCJwba6JICgIH4RtE1LE3juagKs+swI5tb9Y2LacWo+qn1H1aLKeg57bQMqqcWYvkoO1joYoglPc1h4mO0egZjHPQ6ih0K0IYlXw2SRpNylSlIMUE3GW6smjSSwRhscZfXQYUnmQsfYvkFwoKrlZGf/1u0Q7nwlZ1szIKnDMZ5f+k8xBcM0sMNutn/y9CH6A3zo01gQBIF29+1WYAoQspNAnfWQy3ydV7nbjIA9ThDp2WquWw3EVlvqlvm/3C2AFuH/L4q0ltn3qjkCdzXK0O2jW3TRrzligPkAy6CN0Tw2jGW5GENNC1L92vHFH4kBXUPlhvw39TgoN7/KRUjVoYPYgugggOHMIIDgzCCAuygAwIBAgIBADANBgkqhkiG9w0BAQUFADCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20wHhcNMDQwMjEzMTAxMzE1WhcNMzUwMjEzMTAxMzE1WjCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20wgZ8wDQYJKoZIhvcNAQEBBQADgY0AMIGJAoGBAMFHTt38RMxLXJyO2SmS+Ndl72T7oKJ4u4uw+6awntALWh03PewmIJuzbALScsTS4sZoS1fKciBGoh11gIfHzylvkdNe/hJl66/RGqrj5rFb08sAABNTzDTiqqNpJeBsYs/c2aiGozptX2RlnBktH+SUNpAajW724Nv2Wvhif6sFAgMBAAGjge4wgeswHQYDVR0OBBYEFJaffLvGbxe9WT9S1wob7BDWZJRrMIG7BgNVHSMEgbMwgbCAFJaffLvGbxe9WT9S1wob7BDWZJRroYGUpIGRMIGOMQswCQYDVQQGEwJVUzELMAkGA1UECBMCQ0ExFjAUBgNVBAcTDU1vdW50YWluIFZpZXcxFDASBgNVBAoTC1BheVBhbCBJbmMuMRMwEQYDVQQLFApsaXZlX2NlcnRzMREwDwYDVQQDFAhsaXZlX2FwaTEcMBoGCSqGSIb3DQEJARYNcmVAcGF5cGFsLmNvbYIBADAMBgNVHRMEBTADAQH/MA0GCSqGSIb3DQEBBQUAA4GBAIFfOlaagFrl71+jq6OKidbWFSE+Q4FqROvdgIONth+8kSK//Y/4ihuE4Ymvzn5ceE3S/iBSQQMjyvb+s2TWbQYDwcp129OPIbD9epdr4tJOUNiSojw7BHwYRiPh58S1xGlFgHFXwrEBb3dgNbMUa+u4qectsMAXpVHnD9wIyfmHMYIBmjCCAZYCAQEwgZQwgY4xCzAJBgNVBAYTAlVTMQswCQYDVQQIEwJDQTEWMBQGA1UEBxMNTW91bnRhaW4gVmlldzEUMBIGA1UEChMLUGF5UGFsIEluYy4xEzARBgNVBAsUCmxpdmVfY2VydHMxETAPBgNVBAMUCGxpdmVfYXBpMRwwGgYJKoZIhvcNAQkBFg1yZUBwYXlwYWwuY29tAgEAMAkGBSsOAwIaBQCgXTAYBgkqhkiG9w0BCQMxCwYJKoZIhvcNAQcBMBwGCSqGSIb3DQEJBTEPFw0xNjExMjkyMjA1NTNaMCMGCSqGSIb3DQEJBDEWBBS4i3Exr/pFcKOJy8uKmH+nGIMjqDANBgkqhkiG9w0BAQEFAASBgDAbFZ2jieloeB/0wCAcvYCFAIXmmBaMS5js/byzU5gK7exSTlRMX74IkmHemItaOcw3wyFlu4i118D9K9SbSbFiX9DGDcezGh42u/6G8TuZMwlvmiehwMioTVcm4jWG40YLiv8pJZypfoSx2w4IAFb4na5i/E1qOrwQOpiBho+s-----END PKCS7----- +'> +<button alt='PayPal - The safer, easier way to pay online!' class='btn waves-effect waves-light red lighten-3' name='action' type='submit'> +Donate Now +</button> +</form> +</div> +<div class='col l4 s12'> +<h5 class='white-text'>Join the Discussion</h5> +<p class='grey-text text-lighten-4'>We have a Gitter chat room set up where you can talk directly with us. Come in and discuss new features, future goals, general problems or questions, or anything else you can think of.</p> +<a class='btn waves-effect waves-light red lighten-3' href='https://gitter.im/Syncleus/aparapi' target='_blank'>Chat</a> +</div> +<div class='col l4 s12' style='overflow: hidden;'> +<h5 class='white-text'>Connect</h5> +<iframe allowtransparency='true' frameborder='0' height='30' scrolling='0' src='http://ghbtns.com/github-btn.html?user=Syncleus&repo=aparapi&type=watch&count=true&size=large' width='170'></iframe> +<br> +<a class='twitter-follow-button' data-dnt='true' data-show-count='true' data-size='large' href='https://twitter.com/AparapiLib'>Follow @AparapiLib</a> +<br> +<div class='g-follow' data-annotation='bubble' data-height='24' data-href='https://plus.google.com/102266131584900704956' data-rel='publisher'></div> +</div> +</div> +</div> +<div class='footer-copyright'> +<div class='container'> +© 2016-2017 Syncleus, All rights reserved. +<a class='grey-text text-lighten-4 right' href='https://github.com/Syncleus/aparapi/blob/master/LICENSE'>Apache License v2</a> +</div> +</div> +</footer> +<!-- Scripts --> +<script src='https://code.jquery.com/jquery-2.1.4.min.js'></script> +<script> + if (!window.jQuery) { document.write('<script src="bin/jquery-2.1.1.min.js"><\/script>'); } +</script> +<script src='/javascripts/jquery.timeago.js'></script> +<script src='/javascripts/materialize.min.js'></script> +<script src='/javascripts/lunr.min.js'></script> +<script src='/javascripts/search.js'></script> +<script src='/javascripts/materialize.js'></script> +<script src='/javascripts/init.js'></script> +<!-- Twitter Button --> +<script> + !function(d,s,id){var js,fjs=d.getElementsByTagName(s)[0],p=/^http:/.test(d.location)?'http':'https';if(!d.getElementById(id)){js=d.createElement(s);js.id=id;js.src=p+'://platform.twitter.com/widgets.js';fjs.parentNode.insertBefore(js,fjs);}}(document, 'script', 'twitter-wjs'); +</script> +<!-- Google Plus Button --> +<script async='' defer='defer' src='https://apis.google.com/js/platform.js'></script> +</body> </html> diff --git a/introduction/about.html b/introduction/about.html index 0d3c62e15f28c86fa299cc86d6147ce160b02d76..73e4e15206d3749feae80da880bacefb4237610d 100644 --- a/introduction/about.html +++ b/introduction/about.html @@ -1,140 +1,142 @@ <!DOCTYPE html> <html lang='en'> - <head> - <meta content='text/html; charset=UTF-8' http-equiv='Content-Type'> - <meta content='width=device-width, initial-scale=1' name='viewport'> - <meta content='IE=edge' http-equiv='X-UA-Compatible'> - <meta content='no' name='msapplication-tap-highlight'> - <meta content='Aparapi is an Open-source framework for executing native Java code on the GPU, developed by Syncleus.' name='description'> - <title> - Aparapi | About - </title> - <!-- Favicons --> - <link href='/images/favicon/apple-touch-icon-152x152.png' rel='apple-touch-icon-precomposed'> - <meta content='#FFFFFF' name='msapplication-TileColor'> - <meta content='/images/favicon/mstile-144x144.png' name='msapplication-TileImage'> - <link href='/images/favicon/favicon-32x32.png' rel='icon' sizes='32x32'> - <!-- Android 5 Chrome Color --> - <meta content='#EE6E73' name='theme-color'> - <!-- CSS --> - <link href='/stylesheets/highlight.css' media='screen,projection' rel='stylesheet' type='text/css'> - <link href='/stylesheets/style.css' media='screen,projection' rel='stylesheet' type='text/css'> - <link href='http://fonts.googleapis.com/css?family=Inconsolata' rel='stylesheet' type='text/css'> - <link href='http://fonts.googleapis.com/icon?family=Material+Icons' rel='stylesheet'> - </head> - <body> - <header> - <div class='container'> - <a class='button-collapse top-nav waves-effect waves-light circle hide-on-large-only' data-activates='nav-mobile' href='#'> - <i class='material-icons'>menu</i> - </a> - </div> - <ul class='side-nav fixed' id='nav-mobile'> - <li class='logo'> - <a class='brand-logo' href='/' id='logo-container'> - <object data='/images/logo.svg' id='front-page-logo' type='image/svg+xml'>Your browser does not support SVG</object> - </a> - </li> - <li class='search'> - <div class='search-wrapper card'> - <input id='search'> - <i class='material-icons'>search</i> - <div class='search-results'></div> - </div> - </li> - <li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/">Overview</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Introduction</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/about.html">About</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/getting-started.html">Getting Started</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/faq.html">FAQ</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Documentation</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/aparapi-patterns.html">Aparapi Patterns</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/choosing-specific-devices.html">Choosing Specific Devices</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/converting-java-to-opencl.html">Converting Java to OpenCL</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/emulating-multiple-entrypoints.html">Emulating Multiple Entrypoints</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/explicit-buffer-handling.html">Explicit Buffer Handling</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/hsa-enabled-lambda.html">HSA Enabled Lambda</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/kernel-guidelines.html">Kernel Guidelines</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/library-agent-duality.html">Library Agent Duality</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/new-features.html">New Features</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/opencl-bindings.html">OpenCL Bindings</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/private-memory-space.html">Private Memory Space</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/profiling-the-kernel.html">Profiling the Kernel</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/setting-up-hsa.html">Setting Up HSA</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/unit-tests.html">Unit Tests</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/using-hsa-simulator.html">Using HSA Simulator</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/constant-memory.html">Constant Memory</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/local-memory.html">Local Memory</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/multiple-dim-ranges.html">Multiple Dim Ranges</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Proposals</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/multiple-dim-nd-range.html">Multiple Dim ND Range</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/lambdas.html">Lambdas</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/address-space-with-buffers.html">Address Space with Buffers</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/extensions.html">Extensions</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/device.html">Device</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/multiple-entry-points.html">Multiple Entry Points</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/lambda-syntax.html">Lambda Syntax</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/showcase.html">Showcase</a></li></ul></li> - </ul> - </header> - <main> - <div class='section no-pad-bot' id='index-banner'> - <div class='container'> - <h1 class='header center-on-small-only'>About</h1> - <div class='row center'> - </div> - - </div> - - </div> - <div class='container'> - <h1>About the Name</h1> - - <p>Aparapi is just a contraction of “A PARallel API”</p> - - <p>However… “Apa rapi” in Indonesian (the language spoken on the island of Java) translates to “What a neat…”. So “Apa rapi Java Project” translates to “What a neat Java Project” How cool is that?</p> - - <h1>In the News</h1> - - <ul> - <li><a href="http://docplayer.net/4108807-Cloud-computing-cc-session-session-title-speaker-cc-4022.html" rel="nofollow">GPU Acceleration of Interactive Large Scale Data Analytics Utilizing The Aparapi Framework</a></li> - <li>“Aparapi: OpenCL GPU and Multi-Core CPU Heterogeneous Computing for Java” - Ryan LaMothe and Gary Frost - AFDS</li> - <li><a href="https://youtu.be/o0ZYz9lrp_Y" rel="nofollow">Performance Evaluation of AMD-APARAPI Using Real World Applications</a></li> - <li><a href="http://conferences.oreilly.com/oscon/oscon2012/public/schedule/detail/23434" rel="nofollow">Aparapi: An Open Source tool for extending the Java promise of ‘Write Once Run Anywhere’ to include the GPU</a></li> - </ul> - - <h1>Similar Work</h1> - - <ul> - <li>Peter Calvert’s java-GPU has similar goals and offers a mechanism for converting Java code for use on the GPU</li> - <li>Check out Peter’s dissertation “Parallelisation of Java for Graphics Processors” which can be found here</li> - <li>Marco Hutter’s Java bindings for CUDA</li> - <li>Marco Hutter’s Java bindings for OpenCL</li> - <li>Ian Wetherbee’s Java acceleration project - creates accelerated code from Java (currently C code and native Android - but CUDA creation planned)</li> - <li>“Rootbeer: Seamlessly using GPUs from Java” by Philip C. Pratt-Szeliga</li> - </ul> - </div> - </main> - <footer class='page-footer'> - <div class='container'> - <div class='row'> - <div class='col l4 s12'> - <h5 class='white-text'>Help Aparapi Grow</h5> - <p class='grey-text text-lighten-4'>We are a team of volunteers working on this project like it's our full time job. Any amount would help support and continue development on this project and is greatly appreciated.</p> - <form action='https://www.paypal.com/cgi-bin/webscr' id='paypal-donate' method='post' target='_top'> - <input name='cmd' type='hidden' value='_s-xclick'> - <input name='encrypted' type='hidden' value='-----BEGIN PKCS7-----MIIHoAYJKoZIhvcNAQcEoIIHkTCCB40CAQExggEwMIIBLAIBADCBlDCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20CAQAwDQYJKoZIhvcNAQEBBQAEgYATcKxN8t35TG2x34eY272SuZO3QbGy+BTGIM5DRV6Hmosotzw2TF42ceWmbXb3Gk4Wy5kUgo4TgHExCZHUSlHUl+A9KWLFejotgQJPhbiBsnns3klWbKftA3LEnP/kz/SW7OyBlpluoHoEGb354/aoX3JEctp3akHiZEmD7JyEgjELMAkGBSsOAwIaBQAwggEcBgkqhkiG9w0BBwEwFAYIKoZIhvcNAwcECOGCJwba6JICgIH4RtE1LE3juagKs+swI5tb9Y2LacWo+qn1H1aLKeg57bQMqqcWYvkoO1joYoglPc1h4mO0egZjHPQ6ih0K0IYlXw2SRpNylSlIMUE3GW6smjSSwRhscZfXQYUnmQsfYvkFwoKrlZGf/1u0Q7nwlZ1szIKnDMZ5f+k8xBcM0sMNutn/y9CH6A3zo01gQBIF29+1WYAoQspNAnfWQy3ydV7nbjIA9ThDp2WquWw3EVlvqlvm/3C2AFuH/L4q0ltn3qjkCdzXK0O2jW3TRrzligPkAy6CN0Tw2jGW5GENNC1L92vHFH4kBXUPlhvw39TgoN7/KRUjVoYPYgugggOHMIIDgzCCAuygAwIBAgIBADANBgkqhkiG9w0BAQUFADCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20wHhcNMDQwMjEzMTAxMzE1WhcNMzUwMjEzMTAxMzE1WjCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20wgZ8wDQYJKoZIhvcNAQEBBQADgY0AMIGJAoGBAMFHTt38RMxLXJyO2SmS+Ndl72T7oKJ4u4uw+6awntALWh03PewmIJuzbALScsTS4sZoS1fKciBGoh11gIfHzylvkdNe/hJl66/RGqrj5rFb08sAABNTzDTiqqNpJeBsYs/c2aiGozptX2RlnBktH+SUNpAajW724Nv2Wvhif6sFAgMBAAGjge4wgeswHQYDVR0OBBYEFJaffLvGbxe9WT9S1wob7BDWZJRrMIG7BgNVHSMEgbMwgbCAFJaffLvGbxe9WT9S1wob7BDWZJRroYGUpIGRMIGOMQswCQYDVQQGEwJVUzELMAkGA1UECBMCQ0ExFjAUBgNVBAcTDU1vdW50YWluIFZpZXcxFDASBgNVBAoTC1BheVBhbCBJbmMuMRMwEQYDVQQLFApsaXZlX2NlcnRzMREwDwYDVQQDFAhsaXZlX2FwaTEcMBoGCSqGSIb3DQEJARYNcmVAcGF5cGFsLmNvbYIBADAMBgNVHRMEBTADAQH/MA0GCSqGSIb3DQEBBQUAA4GBAIFfOlaagFrl71+jq6OKidbWFSE+Q4FqROvdgIONth+8kSK//Y/4ihuE4Ymvzn5ceE3S/iBSQQMjyvb+s2TWbQYDwcp129OPIbD9epdr4tJOUNiSojw7BHwYRiPh58S1xGlFgHFXwrEBb3dgNbMUa+u4qectsMAXpVHnD9wIyfmHMYIBmjCCAZYCAQEwgZQwgY4xCzAJBgNVBAYTAlVTMQswCQYDVQQIEwJDQTEWMBQGA1UEBxMNTW91bnRhaW4gVmlldzEUMBIGA1UEChMLUGF5UGFsIEluYy4xEzARBgNVBAsUCmxpdmVfY2VydHMxETAPBgNVBAMUCGxpdmVfYXBpMRwwGgYJKoZIhvcNAQkBFg1yZUBwYXlwYWwuY29tAgEAMAkGBSsOAwIaBQCgXTAYBgkqhkiG9w0BCQMxCwYJKoZIhvcNAQcBMBwGCSqGSIb3DQEJBTEPFw0xNjExMjkyMjA1NTNaMCMGCSqGSIb3DQEJBDEWBBS4i3Exr/pFcKOJy8uKmH+nGIMjqDANBgkqhkiG9w0BAQEFAASBgDAbFZ2jieloeB/0wCAcvYCFAIXmmBaMS5js/byzU5gK7exSTlRMX74IkmHemItaOcw3wyFlu4i118D9K9SbSbFiX9DGDcezGh42u/6G8TuZMwlvmiehwMioTVcm4jWG40YLiv8pJZypfoSx2w4IAFb4na5i/E1qOrwQOpiBho+s-----END PKCS7-----'> - <button alt='PayPal - The safer, easier way to pay online!' class='btn waves-effect waves-light red lighten-3' name='action' type='submit'> - Donate Now - </button> - </form> - </div> - <div class='col l4 s12'> - <h5 class='white-text'>Join the Discussion</h5> - <p class='grey-text text-lighten-4'>We have a Gitter chat room set up where you can talk directly with us. Come in and discuss new features, future goals, general problems or questions, or anything else you can think of.</p> - <a class='btn waves-effect waves-light red lighten-3' href='https://gitter.im/Syncleus/aparapi' target='_blank'>Chat</a> - </div> - <div class='col l4 s12' style='overflow: hidden;'> - <h5 class='white-text'>Connect</h5> - <iframe allowtransparency='true' frameborder='0' height='30' scrolling='0' src='http://ghbtns.com/github-btn.html?user=Syncleus&repo=aparapi&type=watch&count=true&size=large' width='170'></iframe> - <br> - <a class='twitter-follow-button' data-dnt='true' data-show-count='true' data-size='large' href='https://twitter.com/AparapiLib'>Follow @AparapiLib</a> - <br> - <div class='g-follow' data-annotation='bubble' data-height='24' data-href='https://plus.google.com/102266131584900704956' data-rel='publisher'></div> - </div> - </div> - </div> - <div class='footer-copyright'> - <div class='container'> - © 2016-2017 Syncleus, All rights reserved. - <a class='grey-text text-lighten-4 right' href='https://github.com/Syncleus/aparapi/blob/master/LICENSE'>Apache License v2</a> - </div> - </div> - </footer> - <!-- Scripts --> - <script src='https://code.jquery.com/jquery-2.1.4.min.js'></script> - <script> - if (!window.jQuery) { document.write('<script src="bin/jquery-2.1.1.min.js"><\/script>'); } - </script> - <script src='/javascripts/jquery.timeago.js'></script> - <script src='/javascripts/materialize.min.js'></script> - <script src='/javascripts/lunr.min.js'></script> - <script src='/javascripts/search.js'></script> - <script src='/javascripts/materialize.js'></script> - <script src='/javascripts/init.js'></script> - <!-- Twitter Button --> - <script> - !function(d,s,id){var js,fjs=d.getElementsByTagName(s)[0],p=/^http:/.test(d.location)?'http':'https';if(!d.getElementById(id)){js=d.createElement(s);js.id=id;js.src=p+'://platform.twitter.com/widgets.js';fjs.parentNode.insertBefore(js,fjs);}}(document, 'script', 'twitter-wjs'); - </script> - <!-- Google Plus Button --> - <script async='' defer='defer' src='https://apis.google.com/js/platform.js'></script> - </body> +<head> +<meta content='text/html; charset=UTF-8' http-equiv='Content-Type'> +<meta content='width=device-width, initial-scale=1' name='viewport'> +<meta content='IE=edge' http-equiv='X-UA-Compatible'> +<meta content='no' name='msapplication-tap-highlight'> +<meta content='Aparapi is an Open-source framework for executing native Java code on the GPU, developed by Syncleus.' name='description'> +<title> +Aparapi | About +</title> +<!-- Favicons --> +<link href='/images/favicon/apple-touch-icon-152x152.png' rel='apple-touch-icon-precomposed'> +<meta content='#FFFFFF' name='msapplication-TileColor'> +<meta content='/images/favicon/mstile-144x144.png' name='msapplication-TileImage'> +<link href='/images/favicon/favicon-32x32.png' rel='icon' sizes='32x32'> +<!-- Android 5 Chrome Color --> +<meta content='#EE6E73' name='theme-color'> +<!-- CSS --> +<link href='/stylesheets/highlight.css' media='screen,projection' rel='stylesheet' type='text/css'> +<link href='/stylesheets/style.css' media='screen,projection' rel='stylesheet' type='text/css'> +<link href='http://fonts.googleapis.com/css?family=Inconsolata' rel='stylesheet' type='text/css'> +<link href='http://fonts.googleapis.com/icon?family=Material+Icons' rel='stylesheet'> +</head> +<body> +<header> +<div class='container'> +<a class='button-collapse top-nav waves-effect waves-light circle hide-on-large-only' data-activates='nav-mobile' href='#'> +<i class='material-icons'>menu</i> +</a> +</div> +<ul class='side-nav fixed' id='nav-mobile'> +<li class='logo'> +<a class='brand-logo' href='/' id='logo-container'> +<object data='/images/logo.svg' id='front-page-logo' type='image/svg+xml'>Your browser does not support SVG</object> +</a> +</li> +<li class='search'> +<div class='search-wrapper card'> +<input id='search'> +<i class='material-icons'>search</i> +<div class='search-results'></div> +</div> +</li> +<li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/">Overview</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Introduction</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/about.html">About</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/getting-started.html">Getting Started</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/faq.html">FAQ</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Documentation</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/aparapi-patterns.html">Aparapi Patterns</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/choosing-specific-devices.html">Choosing Specific Devices</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/converting-java-to-opencl.html">Converting Java to OpenCL</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/emulating-multiple-entrypoints.html">Emulating Multiple Entrypoints</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/explicit-buffer-handling.html">Explicit Buffer Handling</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/hsa-enabled-lambda.html">HSA Enabled Lambda</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/kernel-guidelines.html">Kernel Guidelines</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/library-agent-duality.html">Library Agent Duality</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/new-features.html">New Features</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/opencl-bindings.html">OpenCL Bindings</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/private-memory-space.html">Private Memory Space</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/profiling-the-kernel.html">Profiling the Kernel</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/setting-up-hsa.html">Setting Up HSA</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/unit-tests.html">Unit Tests</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/using-hsa-simulator.html">Using HSA Simulator</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/constant-memory.html">Constant Memory</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/local-memory.html">Local Memory</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/multiple-dim-ranges.html">Multiple Dim Ranges</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Proposals</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/multiple-dim-nd-range.html">Multiple Dim ND Range</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/lambdas.html">Lambdas</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/address-space-with-buffers.html">Address Space with Buffers</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/extensions.html">Extensions</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/device.html">Device</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/multiple-entry-points.html">Multiple Entry Points</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/lambda-syntax.html">Lambda Syntax</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/showcase.html">Showcase</a></li></ul></li> +</ul> +</header> +<main> +<div class='section no-pad-bot' id='index-banner'> +<div class='container'> +<h1 class='header center-on-small-only'>About</h1> +<div class='row center'> +</div> + +</div> + +</div> +<div class='container'> +<h1>About the Name</h1> + +<p>Aparapi is just a contraction of “A PARallel API”</p> + +<p>However… “Apa rapi” in Indonesian (the language spoken on the island of Java) translates to “What a neat…”. So “Apa rapi Java Project” translates to “What a neat Java Project” How cool is that?</p> + +<h1>In the News</h1> + +<ul> +<li><a href="http://docplayer.net/4108807-Cloud-computing-cc-session-session-title-speaker-cc-4022.html" rel="nofollow">GPU Acceleration of Interactive Large Scale Data Analytics Utilizing The Aparapi Framework</a></li> +<li>“Aparapi: OpenCL GPU and Multi-Core CPU Heterogeneous Computing for Java” - Ryan LaMothe and Gary Frost - AFDS</li> +<li><a href="https://youtu.be/o0ZYz9lrp_Y" rel="nofollow">Performance Evaluation of AMD-APARAPI Using Real World Applications</a></li> +<li><a href="http://conferences.oreilly.com/oscon/oscon2012/public/schedule/detail/23434" rel="nofollow">Aparapi: An Open Source tool for extending the Java promise of ‘Write Once Run Anywhere’ to include the GPU</a></li> +</ul> + +<h1>Similar Work</h1> + +<ul> +<li>Peter Calvert’s java-GPU has similar goals and offers a mechanism for converting Java code for use on the GPU</li> +<li>Check out Peter’s dissertation “Parallelisation of Java for Graphics Processors” which can be found here</li> +<li>Marco Hutter’s Java bindings for CUDA</li> +<li>Marco Hutter’s Java bindings for OpenCL</li> +<li>Ian Wetherbee’s Java acceleration project - creates accelerated code from Java (currently C code and native Android - but CUDA creation planned)</li> +<li>“Rootbeer: Seamlessly using GPUs from Java” by Philip C. Pratt-Szeliga</li> +</ul> + +</div> +</main> +<footer class='page-footer'> +<div class='container'> +<div class='row'> +<div class='col l4 s12'> +<h5 class='white-text'>Help Aparapi Grow</h5> +<p class='grey-text text-lighten-4'>We are a team of volunteers working on this project like it's our full time job. Any amount would help support and continue development on this project and is greatly appreciated.</p> +<form action='https://www.paypal.com/cgi-bin/webscr' id='paypal-donate' method='post' target='_top'> +<input name='cmd' type='hidden' value='_s-xclick'> +<input name='encrypted' type='hidden' value='-----BEGIN PKCS7-----MIIHoAYJKoZIhvcNAQcEoIIHkTCCB40CAQExggEwMIIBLAIBADCBlDCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20CAQAwDQYJKoZIhvcNAQEBBQAEgYATcKxN8t35TG2x34eY272SuZO3QbGy+BTGIM5DRV6Hmosotzw2TF42ceWmbXb3Gk4Wy5kUgo4TgHExCZHUSlHUl+A9KWLFejotgQJPhbiBsnns3klWbKftA3LEnP/kz/SW7OyBlpluoHoEGb354/aoX3JEctp3akHiZEmD7JyEgjELMAkGBSsOAwIaBQAwggEcBgkqhkiG9w0BBwEwFAYIKoZIhvcNAwcECOGCJwba6JICgIH4RtE1LE3juagKs+swI5tb9Y2LacWo+qn1H1aLKeg57bQMqqcWYvkoO1joYoglPc1h4mO0egZjHPQ6ih0K0IYlXw2SRpNylSlIMUE3GW6smjSSwRhscZfXQYUnmQsfYvkFwoKrlZGf/1u0Q7nwlZ1szIKnDMZ5f+k8xBcM0sMNutn/y9CH6A3zo01gQBIF29+1WYAoQspNAnfWQy3ydV7nbjIA9ThDp2WquWw3EVlvqlvm/3C2AFuH/L4q0ltn3qjkCdzXK0O2jW3TRrzligPkAy6CN0Tw2jGW5GENNC1L92vHFH4kBXUPlhvw39TgoN7/KRUjVoYPYgugggOHMIIDgzCCAuygAwIBAgIBADANBgkqhkiG9w0BAQUFADCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20wHhcNMDQwMjEzMTAxMzE1WhcNMzUwMjEzMTAxMzE1WjCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20wgZ8wDQYJKoZIhvcNAQEBBQADgY0AMIGJAoGBAMFHTt38RMxLXJyO2SmS+Ndl72T7oKJ4u4uw+6awntALWh03PewmIJuzbALScsTS4sZoS1fKciBGoh11gIfHzylvkdNe/hJl66/RGqrj5rFb08sAABNTzDTiqqNpJeBsYs/c2aiGozptX2RlnBktH+SUNpAajW724Nv2Wvhif6sFAgMBAAGjge4wgeswHQYDVR0OBBYEFJaffLvGbxe9WT9S1wob7BDWZJRrMIG7BgNVHSMEgbMwgbCAFJaffLvGbxe9WT9S1wob7BDWZJRroYGUpIGRMIGOMQswCQYDVQQGEwJVUzELMAkGA1UECBMCQ0ExFjAUBgNVBAcTDU1vdW50YWluIFZpZXcxFDASBgNVBAoTC1BheVBhbCBJbmMuMRMwEQYDVQQLFApsaXZlX2NlcnRzMREwDwYDVQQDFAhsaXZlX2FwaTEcMBoGCSqGSIb3DQEJARYNcmVAcGF5cGFsLmNvbYIBADAMBgNVHRMEBTADAQH/MA0GCSqGSIb3DQEBBQUAA4GBAIFfOlaagFrl71+jq6OKidbWFSE+Q4FqROvdgIONth+8kSK//Y/4ihuE4Ymvzn5ceE3S/iBSQQMjyvb+s2TWbQYDwcp129OPIbD9epdr4tJOUNiSojw7BHwYRiPh58S1xGlFgHFXwrEBb3dgNbMUa+u4qectsMAXpVHnD9wIyfmHMYIBmjCCAZYCAQEwgZQwgY4xCzAJBgNVBAYTAlVTMQswCQYDVQQIEwJDQTEWMBQGA1UEBxMNTW91bnRhaW4gVmlldzEUMBIGA1UEChMLUGF5UGFsIEluYy4xEzARBgNVBAsUCmxpdmVfY2VydHMxETAPBgNVBAMUCGxpdmVfYXBpMRwwGgYJKoZIhvcNAQkBFg1yZUBwYXlwYWwuY29tAgEAMAkGBSsOAwIaBQCgXTAYBgkqhkiG9w0BCQMxCwYJKoZIhvcNAQcBMBwGCSqGSIb3DQEJBTEPFw0xNjExMjkyMjA1NTNaMCMGCSqGSIb3DQEJBDEWBBS4i3Exr/pFcKOJy8uKmH+nGIMjqDANBgkqhkiG9w0BAQEFAASBgDAbFZ2jieloeB/0wCAcvYCFAIXmmBaMS5js/byzU5gK7exSTlRMX74IkmHemItaOcw3wyFlu4i118D9K9SbSbFiX9DGDcezGh42u/6G8TuZMwlvmiehwMioTVcm4jWG40YLiv8pJZypfoSx2w4IAFb4na5i/E1qOrwQOpiBho+s-----END PKCS7----- +'> +<button alt='PayPal - The safer, easier way to pay online!' class='btn waves-effect waves-light red lighten-3' name='action' type='submit'> +Donate Now +</button> +</form> +</div> +<div class='col l4 s12'> +<h5 class='white-text'>Join the Discussion</h5> +<p class='grey-text text-lighten-4'>We have a Gitter chat room set up where you can talk directly with us. Come in and discuss new features, future goals, general problems or questions, or anything else you can think of.</p> +<a class='btn waves-effect waves-light red lighten-3' href='https://gitter.im/Syncleus/aparapi' target='_blank'>Chat</a> +</div> +<div class='col l4 s12' style='overflow: hidden;'> +<h5 class='white-text'>Connect</h5> +<iframe allowtransparency='true' frameborder='0' height='30' scrolling='0' src='http://ghbtns.com/github-btn.html?user=Syncleus&repo=aparapi&type=watch&count=true&size=large' width='170'></iframe> +<br> +<a class='twitter-follow-button' data-dnt='true' data-show-count='true' data-size='large' href='https://twitter.com/AparapiLib'>Follow @AparapiLib</a> +<br> +<div class='g-follow' data-annotation='bubble' data-height='24' data-href='https://plus.google.com/102266131584900704956' data-rel='publisher'></div> +</div> +</div> +</div> +<div class='footer-copyright'> +<div class='container'> +© 2016-2017 Syncleus, All rights reserved. +<a class='grey-text text-lighten-4 right' href='https://github.com/Syncleus/aparapi/blob/master/LICENSE'>Apache License v2</a> +</div> +</div> +</footer> +<!-- Scripts --> +<script src='https://code.jquery.com/jquery-2.1.4.min.js'></script> +<script> + if (!window.jQuery) { document.write('<script src="bin/jquery-2.1.1.min.js"><\/script>'); } +</script> +<script src='/javascripts/jquery.timeago.js'></script> +<script src='/javascripts/materialize.min.js'></script> +<script src='/javascripts/lunr.min.js'></script> +<script src='/javascripts/search.js'></script> +<script src='/javascripts/materialize.js'></script> +<script src='/javascripts/init.js'></script> +<!-- Twitter Button --> +<script> + !function(d,s,id){var js,fjs=d.getElementsByTagName(s)[0],p=/^http:/.test(d.location)?'http':'https';if(!d.getElementById(id)){js=d.createElement(s);js.id=id;js.src=p+'://platform.twitter.com/widgets.js';fjs.parentNode.insertBefore(js,fjs);}}(document, 'script', 'twitter-wjs'); +</script> +<!-- Google Plus Button --> +<script async='' defer='defer' src='https://apis.google.com/js/platform.js'></script> +</body> </html> diff --git a/introduction/faq.html b/introduction/faq.html index 4e58943233d80d6082e77f0d4cfc092af19c09d4..73054ae250d5f71178e02927ee06ab7f5ede67f0 100644 --- a/introduction/faq.html +++ b/introduction/faq.html @@ -1,251 +1,253 @@ <!DOCTYPE html> <html lang='en'> - <head> - <meta content='text/html; charset=UTF-8' http-equiv='Content-Type'> - <meta content='width=device-width, initial-scale=1' name='viewport'> - <meta content='IE=edge' http-equiv='X-UA-Compatible'> - <meta content='no' name='msapplication-tap-highlight'> - <meta content='Aparapi is an Open-source framework for executing native Java code on the GPU, developed by Syncleus.' name='description'> - <title> - Aparapi | FAQ - </title> - <!-- Favicons --> - <link href='/images/favicon/apple-touch-icon-152x152.png' rel='apple-touch-icon-precomposed'> - <meta content='#FFFFFF' name='msapplication-TileColor'> - <meta content='/images/favicon/mstile-144x144.png' name='msapplication-TileImage'> - <link href='/images/favicon/favicon-32x32.png' rel='icon' sizes='32x32'> - <!-- Android 5 Chrome Color --> - <meta content='#EE6E73' name='theme-color'> - <!-- CSS --> - <link href='/stylesheets/highlight.css' media='screen,projection' rel='stylesheet' type='text/css'> - <link href='/stylesheets/style.css' media='screen,projection' rel='stylesheet' type='text/css'> - <link href='http://fonts.googleapis.com/css?family=Inconsolata' rel='stylesheet' type='text/css'> - <link href='http://fonts.googleapis.com/icon?family=Material+Icons' rel='stylesheet'> - </head> - <body> - <header> - <div class='container'> - <a class='button-collapse top-nav waves-effect waves-light circle hide-on-large-only' data-activates='nav-mobile' href='#'> - <i class='material-icons'>menu</i> - </a> - </div> - <ul class='side-nav fixed' id='nav-mobile'> - <li class='logo'> - <a class='brand-logo' href='/' id='logo-container'> - <object data='/images/logo.svg' id='front-page-logo' type='image/svg+xml'>Your browser does not support SVG</object> - </a> - </li> - <li class='search'> - <div class='search-wrapper card'> - <input id='search'> - <i class='material-icons'>search</i> - <div class='search-results'></div> - </div> - </li> - <li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/">Overview</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Introduction</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/about.html">About</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/getting-started.html">Getting Started</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/faq.html">FAQ</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Documentation</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/aparapi-patterns.html">Aparapi Patterns</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/choosing-specific-devices.html">Choosing Specific Devices</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/converting-java-to-opencl.html">Converting Java to OpenCL</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/emulating-multiple-entrypoints.html">Emulating Multiple Entrypoints</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/explicit-buffer-handling.html">Explicit Buffer Handling</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/hsa-enabled-lambda.html">HSA Enabled Lambda</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/kernel-guidelines.html">Kernel Guidelines</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/library-agent-duality.html">Library Agent Duality</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/new-features.html">New Features</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/opencl-bindings.html">OpenCL Bindings</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/private-memory-space.html">Private Memory Space</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/profiling-the-kernel.html">Profiling the Kernel</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/setting-up-hsa.html">Setting Up HSA</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/unit-tests.html">Unit Tests</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/using-hsa-simulator.html">Using HSA Simulator</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/constant-memory.html">Constant Memory</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/local-memory.html">Local Memory</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/multiple-dim-ranges.html">Multiple Dim Ranges</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Proposals</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/multiple-dim-nd-range.html">Multiple Dim ND Range</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/lambdas.html">Lambdas</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/address-space-with-buffers.html">Address Space with Buffers</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/extensions.html">Extensions</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/device.html">Device</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/multiple-entry-points.html">Multiple Entry Points</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/lambda-syntax.html">Lambda Syntax</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/showcase.html">Showcase</a></li></ul></li> - </ul> - </header> - <main> - <div class='section no-pad-bot' id='index-banner'> - <div class='container'> - <h1 class='header center-on-small-only'>FAQ</h1> - <div class='row center'> - <h4 class='header col s12 light center'>Frequently Asked Questions.</h4> - </div> - - </div> - - </div> - <div class='container'> - <p><strong>Frequently Asked Questions</strong></p> - - <p><strong>Why is this project called Aparapi and how is it pronounced?</strong></p> - - <p>Aparapi is just a contraction of A PAR{allel} API and is pronounced (ap-per-rap-ee).</p> - - <p><strong>Does Aparapi only work with AMD graphics cards?</strong></p> - - <p>No. Aparapi has been tested with AMD’s OpenCL enabled drivers and devices as well as a limited set of NVidia devices and drivers on Windows, Linux and Mac OSX platforms. The minimal requirement at runtime is OpenCL 1.1. If you have a compatible OpenCL 1.1 runtime and supported devices Aparapi should work.</p> - - <p>Although the build is currently configured for AMD APP SDK, OpenCL is an open standard and we look forward to contributions which will allow Aparapi to be built against other OpenCL SDK’s.</p> - - <p>Note that dll’s built using AMD APP SDK will work on other platforms at runtime. So the binary builds are expected to work on all OpenCL 1.1 platforms.</p> - - <p>Witold Bolt has kindly supplied the patches to allow Mac OS support. The Mac OS build will run against OpenCL 1.1 and 1.0 runtimes, but we won’t fix any issues reported against the OpenCL 1.0, your code may run, or may not.</p> - - <p>Aparapi may be used in JTP (Java Thread Pool) mode on any platform supported by Oracle®’s JDK.</p> - - <p>** Does Aparapi only support AMD CPUs?**</p> - - <p>No, there is nothing restricting Aparapi to AMD CPUs. The JNI code that we use may run on any x86/x64 machine provided there is a compatible Java Virtual Machine® JVM implementation for your platform.</p> - - <p><strong>Will there be an Aparapi-like translator for .NET?</strong></p> - - <p>This is still an early technology and Aparapi is currently focused on Java® enablement. There are similar projects targeting .NET (See www.tidepowerd.com)</p> - - <p><strong>How can I profile the OpenCL kernels that Aparapi generates? Can I get details on the latency of my kernel request?How do I optimize my kernel?</strong></p> - - <p>AMD offers the ‘AMD APP Profiler’ which can be used to profile the kernels. With Aparapi, we recommend using the command line mode of the profiler, which is described in the release notes. Using the ‘AMD APP Profiler’ you can see how much time is taken by each kernel execution and buffer transfer. Also, in each kernel, you can get more detailed information on things like memory reads and writes, and other useful data.</p> - - <p><strong>Can I have multiple threads all using the GPU compute capabilities?</strong></p> - - <p>Yes. There might be a performance impact if the device becomes a bottleneck. However, OpenCL and your GPU driver are designed to coordinate the various threads of execution.</p> - - <p><strong>Can I make method calls from the run method?</strong></p> - - <p>You can generally only make calls to other methods declared in the same class as the initial run() method. Aparapi will follow this call chain to try to determine whether it can create OpenCL. If, for example, Aparapi encounters System.out.println(“Hello World”) ( call to a method not in the users Kernel class) it will detect this and refuse to consider the call chain as an OpenCL candidate.</p> - - <p>One exception to this rule allows a kernel to access or mutate the state of objects held in simple arrays via their setters/getters. For example a kernel can include :-</p> - <pre class="highlight java"><code> - <span class="n">out</span><span class="o">[</span><span class="n">i</span><span class="o">].</span><span class="na">setValue</span><span class="o">(</span><span class="n">in</span><span class="o">[</span><span class="n">i</span><span class="o">].</span><span class="na">getValue</span><span class="o">()*</span><span class="mi">5</span><span class="o">);</span> - </code></pre> - <p><strong>Does Aparapi support vectorized types?</strong></p> - - <p>Due to Java’s lack of vector types (float4 for example) Aparapi can’t directly use them. Also, due to Java’s lack of operator overloading, simulating these with Java abstracts could lead to very complex and unwieldy code.</p> - - <p><strong>Is there a way I can see the generated OpenCL?</strong></p> - - <p>Yes, by using adding -Dcom.aparapi.enableShowGeneratedOpenCL=true to your command line when you start your JVM.</p> - - <p><strong>Does Aparapi support sharing buffers with JOGL? Can I exploit the features of JOGAMP/glugen?</strong></p> - - <p>Rather than only supporting display-oriented compute, we are pursuing general data parallel compute. Therefore, we have chosen not to bind Aparapi too closely with JOGL.</p> - - <p><strong>What is the performance delta from handcrafted OpenCL?</strong></p> - - <p>This depends heavily on the application. Although we can currently show 20x performance improvement on some compute intensive Java applications compared with the same algorithm using a Java Thread Pool a developer who is prepared to handcraft and hand-tune OpenCL and write custom host code in C/C++ is likely to see better performance than Aparapi may achieve.</p> - - <p>We understand that some user may use Aparapi as a gateway technology to test their Java code before porting to hand-crafted/tuned OpenCL.</p> - - <p><strong>Are you working with Project Lambda for offloading/parallelizing suitable work?</strong></p> - - <p>We are following the progress of Project Lambda (currently scheduled for inclusion in Java 8) and would like to be able to leverage Lambda expression format in Aparapi, but none exists now.</p> - - <p><strong>Can I select a specific GPU if I have more than one card?</strong></p> - - <p>Under review. At present, Aparapi just looks for the first AMD GPU (or APU) device. If the community has feedback on its preference, let us know.</p> - - <p><strong>Can I get the demos/samples presented at JavaOne or ADFS?</strong></p> - - <p>The Squares and Mandlebrot sample code is included in the binary download of Aparapi. The NBody source is not included in the binary (because of a dependency on JOGL). We have, however, included the NBody code as an example project in the Open Source tree (code.google.com/p/aparapi) and provide details and we provide details on how to install the appropriate JOGL components.</p> - - <p><strong>Can Mersenne twister be ported as a random number function inside the kernel class?</strong></p> - - <p>You can elect to implement your own Mersenne twister and use it in our own derived Kernel.</p> - - <p><strong>Does Aparapi use JNI?</strong></p> - - <p>Yes, we do ship a small JNI shim to handle the host OpenCL calls.</p> - - <p><strong>How can I confirm that my code is actually executing on the GPU?</strong></p> - - <p>From within the Java code itself you can query the execution mode after Kernel.execute(n) has returned.</p> - <pre class="highlight java"><code> - <span class="n">Kernel</span> <span class="n">kernel</span> <span class="o">=</span> <span class="k">new</span> <span class="n">Kernel</span><span class="o">(){</span> - <span class="nd">@Override</span> <span class="kd">public</span> <span class="kt">void</span> <span class="nf">run</span><span class="o">(){</span> - <span class="o">}</span> - <span class="o">}</span> <span class="o">;</span> - <span class="n">kernel</span><span class="o">.</span><span class="na">execute</span><span class="o">(</span><span class="mi">1024</span><span class="o">);</span> - <span class="n">System</span><span class="o">.</span><span class="na">out</span><span class="o">.</span><span class="na">priintln</span><span class="o">(</span><span class="err">“</span><span class="n">Execution</span> <span class="n">mode</span> <span class="o">=</span> <span class="err">“</span><span class="o">+</span><span class="n">kernel</span><span class="o">.</span><span class="na">getExecutionMode</span><span class="o">());</span> - </code></pre> - <p>The above code fragment will print either ‘GPU’ if the kernel executed on the GPU or JTP if Aparapi executed the Kernel in a Java Thread Pool.</p> - - <p>Alternatively, setting the property –Dcom.aparapi.enableShowExecutionModes=true when you start your JVM will cause Aparapi to automatically report the execution mode of all kernels to stdout.</p> - - <p><strong>Why does Aparapi need me to compile my code with -g?</strong></p> - - <p>Aparapi extracts most of the information required to create OpenCL from the bytecode of your Kernel.run() (and run-reachable) methods. We use the debug information to re-create the original variable name and to determine the local variable scope information.</p> - - <p>Of course only the derived Kernel class (or accessed Objects using new Arrays of Objects feature) need to be compiled using -g.</p> - - <p><strong>Why does the Aparapi documentation suggest I use Oracle’s JDK/JRE? Why can’t I use any JVM/JDK?</strong></p> - - <p>The documentation suggests using Oracle’s JDK/JRE for coverage reasons and not as a requirement. AMD focused its testing on Oracle’s JVM/JDK.</p> - - <p>There are two parts to this.</p> - - <ol> - <li>Our bytecode to OpenCL engine is somewhat tuned to the bytecode structures created by javac supplied by Oracle®. Specifically, there are some optimizations that other javac implementation might perform that Aparapi won’t recognize. Eclipse (for example) does not presently use Oracle’s javac, and so we do have some experience handling Eclipse specific bytecode patterns.</li> - <li>At runtime, we piggyback on the (aptly named) sun.misc.Unsafe class, which is included in rt.jar from Oracle®. This class is useful because it helps us avoid some JNI calls by providing low level routines for accessing object field addresses (in real memory) and useful routines for Atomic operations. All accesses to ‘sun.misc.Unsafe’ are handled by an Aparapi class called UnsafeWrapper with the intent that this could be refactored to avoid this dependency.</li> - </ol> - - <p><strong>I am using a dynamic language (Clojure, Scala, Groovy, Beanshell, etc) will I be able to use Aparapi?</strong></p> - - <p>No.</p> - - <p>To access the bytecode for a method Aparapi needs to parse the original class file. For Java code, Aparapi can use something like the code below to reload the class file bytes and parse the constant pool, attributes, fields, methods and method bytecode.</p> - <pre class="highlight java"><code> - <span class="n">YourClass</span><span class="o">.</span><span class="na">getClassLoader</span><span class="o">().</span><span class="na">loadAsResource</span><span class="o">(</span><span class="n">YourClass</span><span class="o">.</span><span class="na">getName</span><span class="o">()+</span><span class="s">".class"</span><span class="o">))</span> - </code></pre> - <p>It is unlikely that this process would work with a dynamically created class based on the presumption that dynamic languages employ some form of custom classloader to make dynamically generated bytecode available to the JVM. Therefore, it is unlikely that these classloaders would yield the classfile bytes. However, we encourage contributors to investigate opportunities here. Even if the class bytes were loadable, Aparapi would also expect debug information to be available (see previous FAQ entry). Again, this is not impossible for a dynamic language to do, indeed it would probably even be desirable as it would allow the code to be debugged using JDB compatible debugger.</p> - - <p>Finally, Aparapi recognizes bytecode patterns created by the javac supplied by Oracle® and it is possible that the code generated by a particular dynamic language may not be compatible with Aparapi current code analyzer.</p> - - <p>Therefore, at present this is unlikely to work. However, these would be excellent contributions to Aparapi. It would be great to see Aparapi being adopted by other JVM based dynamic language.</p> - - <p><strong>Why does Aparapi seems to be copying data unnecessarily back and forth between host and GPU. Can I stop Aparapi from doing this?</strong></p> - - <p>Aparapi ensures that required data is moved to the GPU prior to kernel execution and returned to the appropriate array before Java execution resumes. Generally, this is what the Java user will expect. However, for some code patterns where multiple Kernel.execute() calls are made in succession (or more likely in a tight loop) Aparapi’s approach may not be optimal.</p> - - <p>In the NewFeatures page we discuss a couple of Aparapi enhancements which will developers to elect intervene to reduce unnecessary copies.</p> - - <p><strong>Do I have to refactor my code to use arrays of primitives? Why can’t Aparapi just work with Java Objects?</strong></p> - - <p>Aparapi creates OpenCL from the bytecode. Generally, OpenCL constrains us to using parallel primitive arrays (OpenCL does indeed allow structs, but Java and OpenCL do not have comparable memory layouts for these structures). Therefore, you will probably need to refactor your code to use primitive arrays. In this initial contribution, we have included some limited support for Arrays of simple Objects and hope contributors extend them. Check the NewFeatures page which shows how you can use this feature.</p> - </div> - </main> - <footer class='page-footer'> - <div class='container'> - <div class='row'> - <div class='col l4 s12'> - <h5 class='white-text'>Help Aparapi Grow</h5> - <p class='grey-text text-lighten-4'>We are a team of volunteers working on this project like it's our full time job. Any amount would help support and continue development on this project and is greatly appreciated.</p> - <form action='https://www.paypal.com/cgi-bin/webscr' id='paypal-donate' method='post' target='_top'> - <input name='cmd' type='hidden' value='_s-xclick'> - <input name='encrypted' type='hidden' value='-----BEGIN PKCS7-----MIIHoAYJKoZIhvcNAQcEoIIHkTCCB40CAQExggEwMIIBLAIBADCBlDCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20CAQAwDQYJKoZIhvcNAQEBBQAEgYATcKxN8t35TG2x34eY272SuZO3QbGy+BTGIM5DRV6Hmosotzw2TF42ceWmbXb3Gk4Wy5kUgo4TgHExCZHUSlHUl+A9KWLFejotgQJPhbiBsnns3klWbKftA3LEnP/kz/SW7OyBlpluoHoEGb354/aoX3JEctp3akHiZEmD7JyEgjELMAkGBSsOAwIaBQAwggEcBgkqhkiG9w0BBwEwFAYIKoZIhvcNAwcECOGCJwba6JICgIH4RtE1LE3juagKs+swI5tb9Y2LacWo+qn1H1aLKeg57bQMqqcWYvkoO1joYoglPc1h4mO0egZjHPQ6ih0K0IYlXw2SRpNylSlIMUE3GW6smjSSwRhscZfXQYUnmQsfYvkFwoKrlZGf/1u0Q7nwlZ1szIKnDMZ5f+k8xBcM0sMNutn/y9CH6A3zo01gQBIF29+1WYAoQspNAnfWQy3ydV7nbjIA9ThDp2WquWw3EVlvqlvm/3C2AFuH/L4q0ltn3qjkCdzXK0O2jW3TRrzligPkAy6CN0Tw2jGW5GENNC1L92vHFH4kBXUPlhvw39TgoN7/KRUjVoYPYgugggOHMIIDgzCCAuygAwIBAgIBADANBgkqhkiG9w0BAQUFADCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20wHhcNMDQwMjEzMTAxMzE1WhcNMzUwMjEzMTAxMzE1WjCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20wgZ8wDQYJKoZIhvcNAQEBBQADgY0AMIGJAoGBAMFHTt38RMxLXJyO2SmS+Ndl72T7oKJ4u4uw+6awntALWh03PewmIJuzbALScsTS4sZoS1fKciBGoh11gIfHzylvkdNe/hJl66/RGqrj5rFb08sAABNTzDTiqqNpJeBsYs/c2aiGozptX2RlnBktH+SUNpAajW724Nv2Wvhif6sFAgMBAAGjge4wgeswHQYDVR0OBBYEFJaffLvGbxe9WT9S1wob7BDWZJRrMIG7BgNVHSMEgbMwgbCAFJaffLvGbxe9WT9S1wob7BDWZJRroYGUpIGRMIGOMQswCQYDVQQGEwJVUzELMAkGA1UECBMCQ0ExFjAUBgNVBAcTDU1vdW50YWluIFZpZXcxFDASBgNVBAoTC1BheVBhbCBJbmMuMRMwEQYDVQQLFApsaXZlX2NlcnRzMREwDwYDVQQDFAhsaXZlX2FwaTEcMBoGCSqGSIb3DQEJARYNcmVAcGF5cGFsLmNvbYIBADAMBgNVHRMEBTADAQH/MA0GCSqGSIb3DQEBBQUAA4GBAIFfOlaagFrl71+jq6OKidbWFSE+Q4FqROvdgIONth+8kSK//Y/4ihuE4Ymvzn5ceE3S/iBSQQMjyvb+s2TWbQYDwcp129OPIbD9epdr4tJOUNiSojw7BHwYRiPh58S1xGlFgHFXwrEBb3dgNbMUa+u4qectsMAXpVHnD9wIyfmHMYIBmjCCAZYCAQEwgZQwgY4xCzAJBgNVBAYTAlVTMQswCQYDVQQIEwJDQTEWMBQGA1UEBxMNTW91bnRhaW4gVmlldzEUMBIGA1UEChMLUGF5UGFsIEluYy4xEzARBgNVBAsUCmxpdmVfY2VydHMxETAPBgNVBAMUCGxpdmVfYXBpMRwwGgYJKoZIhvcNAQkBFg1yZUBwYXlwYWwuY29tAgEAMAkGBSsOAwIaBQCgXTAYBgkqhkiG9w0BCQMxCwYJKoZIhvcNAQcBMBwGCSqGSIb3DQEJBTEPFw0xNjExMjkyMjA1NTNaMCMGCSqGSIb3DQEJBDEWBBS4i3Exr/pFcKOJy8uKmH+nGIMjqDANBgkqhkiG9w0BAQEFAASBgDAbFZ2jieloeB/0wCAcvYCFAIXmmBaMS5js/byzU5gK7exSTlRMX74IkmHemItaOcw3wyFlu4i118D9K9SbSbFiX9DGDcezGh42u/6G8TuZMwlvmiehwMioTVcm4jWG40YLiv8pJZypfoSx2w4IAFb4na5i/E1qOrwQOpiBho+s-----END PKCS7-----'> - <button alt='PayPal - The safer, easier way to pay online!' class='btn waves-effect waves-light red lighten-3' name='action' type='submit'> - Donate Now - </button> - </form> - </div> - <div class='col l4 s12'> - <h5 class='white-text'>Join the Discussion</h5> - <p class='grey-text text-lighten-4'>We have a Gitter chat room set up where you can talk directly with us. Come in and discuss new features, future goals, general problems or questions, or anything else you can think of.</p> - <a class='btn waves-effect waves-light red lighten-3' href='https://gitter.im/Syncleus/aparapi' target='_blank'>Chat</a> - </div> - <div class='col l4 s12' style='overflow: hidden;'> - <h5 class='white-text'>Connect</h5> - <iframe allowtransparency='true' frameborder='0' height='30' scrolling='0' src='http://ghbtns.com/github-btn.html?user=Syncleus&repo=aparapi&type=watch&count=true&size=large' width='170'></iframe> - <br> - <a class='twitter-follow-button' data-dnt='true' data-show-count='true' data-size='large' href='https://twitter.com/AparapiLib'>Follow @AparapiLib</a> - <br> - <div class='g-follow' data-annotation='bubble' data-height='24' data-href='https://plus.google.com/102266131584900704956' data-rel='publisher'></div> - </div> - </div> - </div> - <div class='footer-copyright'> - <div class='container'> - © 2016-2017 Syncleus, All rights reserved. - <a class='grey-text text-lighten-4 right' href='https://github.com/Syncleus/aparapi/blob/master/LICENSE'>Apache License v2</a> - </div> - </div> - </footer> - <!-- Scripts --> - <script src='https://code.jquery.com/jquery-2.1.4.min.js'></script> - <script> - if (!window.jQuery) { document.write('<script src="bin/jquery-2.1.1.min.js"><\/script>'); } - </script> - <script src='/javascripts/jquery.timeago.js'></script> - <script src='/javascripts/materialize.min.js'></script> - <script src='/javascripts/lunr.min.js'></script> - <script src='/javascripts/search.js'></script> - <script src='/javascripts/materialize.js'></script> - <script src='/javascripts/init.js'></script> - <!-- Twitter Button --> - <script> - !function(d,s,id){var js,fjs=d.getElementsByTagName(s)[0],p=/^http:/.test(d.location)?'http':'https';if(!d.getElementById(id)){js=d.createElement(s);js.id=id;js.src=p+'://platform.twitter.com/widgets.js';fjs.parentNode.insertBefore(js,fjs);}}(document, 'script', 'twitter-wjs'); - </script> - <!-- Google Plus Button --> - <script async='' defer='defer' src='https://apis.google.com/js/platform.js'></script> - </body> +<head> +<meta content='text/html; charset=UTF-8' http-equiv='Content-Type'> +<meta content='width=device-width, initial-scale=1' name='viewport'> +<meta content='IE=edge' http-equiv='X-UA-Compatible'> +<meta content='no' name='msapplication-tap-highlight'> +<meta content='Aparapi is an Open-source framework for executing native Java code on the GPU, developed by Syncleus.' name='description'> +<title> +Aparapi | FAQ +</title> +<!-- Favicons --> +<link href='/images/favicon/apple-touch-icon-152x152.png' rel='apple-touch-icon-precomposed'> +<meta content='#FFFFFF' name='msapplication-TileColor'> +<meta content='/images/favicon/mstile-144x144.png' name='msapplication-TileImage'> +<link href='/images/favicon/favicon-32x32.png' rel='icon' sizes='32x32'> +<!-- Android 5 Chrome Color --> +<meta content='#EE6E73' name='theme-color'> +<!-- CSS --> +<link href='/stylesheets/highlight.css' media='screen,projection' rel='stylesheet' type='text/css'> +<link href='/stylesheets/style.css' media='screen,projection' rel='stylesheet' type='text/css'> +<link href='http://fonts.googleapis.com/css?family=Inconsolata' rel='stylesheet' type='text/css'> +<link href='http://fonts.googleapis.com/icon?family=Material+Icons' rel='stylesheet'> +</head> +<body> +<header> +<div class='container'> +<a class='button-collapse top-nav waves-effect waves-light circle hide-on-large-only' data-activates='nav-mobile' href='#'> +<i class='material-icons'>menu</i> +</a> +</div> +<ul class='side-nav fixed' id='nav-mobile'> +<li class='logo'> +<a class='brand-logo' href='/' id='logo-container'> +<object data='/images/logo.svg' id='front-page-logo' type='image/svg+xml'>Your browser does not support SVG</object> +</a> +</li> +<li class='search'> +<div class='search-wrapper card'> +<input id='search'> +<i class='material-icons'>search</i> +<div class='search-results'></div> +</div> +</li> +<li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/">Overview</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Introduction</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/about.html">About</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/getting-started.html">Getting Started</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/faq.html">FAQ</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Documentation</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/aparapi-patterns.html">Aparapi Patterns</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/choosing-specific-devices.html">Choosing Specific Devices</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/converting-java-to-opencl.html">Converting Java to OpenCL</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/emulating-multiple-entrypoints.html">Emulating Multiple Entrypoints</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/explicit-buffer-handling.html">Explicit Buffer Handling</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/hsa-enabled-lambda.html">HSA Enabled Lambda</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/kernel-guidelines.html">Kernel Guidelines</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/library-agent-duality.html">Library Agent Duality</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/new-features.html">New Features</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/opencl-bindings.html">OpenCL Bindings</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/private-memory-space.html">Private Memory Space</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/profiling-the-kernel.html">Profiling the Kernel</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/setting-up-hsa.html">Setting Up HSA</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/unit-tests.html">Unit Tests</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/using-hsa-simulator.html">Using HSA Simulator</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/constant-memory.html">Constant Memory</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/local-memory.html">Local Memory</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/multiple-dim-ranges.html">Multiple Dim Ranges</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Proposals</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/multiple-dim-nd-range.html">Multiple Dim ND Range</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/lambdas.html">Lambdas</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/address-space-with-buffers.html">Address Space with Buffers</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/extensions.html">Extensions</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/device.html">Device</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/multiple-entry-points.html">Multiple Entry Points</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/lambda-syntax.html">Lambda Syntax</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/showcase.html">Showcase</a></li></ul></li> +</ul> +</header> +<main> +<div class='section no-pad-bot' id='index-banner'> +<div class='container'> +<h1 class='header center-on-small-only'>FAQ</h1> +<div class='row center'> +<h4 class='header col s12 light center'>Frequently Asked Questions.</h4> +</div> + +</div> + +</div> +<div class='container'> +<p><strong>Frequently Asked Questions</strong></p> + +<p><strong>Why is this project called Aparapi and how is it pronounced?</strong></p> + +<p>Aparapi is just a contraction of A PAR{allel} API and is pronounced (ap-per-rap-ee).</p> + +<p><strong>Does Aparapi only work with AMD graphics cards?</strong></p> + +<p>No. Aparapi has been tested with AMD’s OpenCL enabled drivers and devices as well as a limited set of NVidia devices and drivers on Windows, Linux and Mac OSX platforms. The minimal requirement at runtime is OpenCL 1.1. If you have a compatible OpenCL 1.1 runtime and supported devices Aparapi should work.</p> + +<p>Although the build is currently configured for AMD APP SDK, OpenCL is an open standard and we look forward to contributions which will allow Aparapi to be built against other OpenCL SDK’s.</p> + +<p>Note that dll’s built using AMD APP SDK will work on other platforms at runtime. So the binary builds are expected to work on all OpenCL 1.1 platforms.</p> + +<p>Witold Bolt has kindly supplied the patches to allow Mac OS support. The Mac OS build will run against OpenCL 1.1 and 1.0 runtimes, but we won’t fix any issues reported against the OpenCL 1.0, your code may run, or may not.</p> + +<p>Aparapi may be used in JTP (Java Thread Pool) mode on any platform supported by Oracle®’s JDK.</p> + +<p>** Does Aparapi only support AMD CPUs?**</p> + +<p>No, there is nothing restricting Aparapi to AMD CPUs. The JNI code that we use may run on any x86/x64 machine provided there is a compatible Java Virtual Machine® JVM implementation for your platform.</p> + +<p><strong>Will there be an Aparapi-like translator for .NET?</strong></p> + +<p>This is still an early technology and Aparapi is currently focused on Java® enablement. There are similar projects targeting .NET (See www.tidepowerd.com)</p> + +<p><strong>How can I profile the OpenCL kernels that Aparapi generates? Can I get details on the latency of my kernel request?How do I optimize my kernel?</strong></p> + +<p>AMD offers the ‘AMD APP Profiler’ which can be used to profile the kernels. With Aparapi, we recommend using the command line mode of the profiler, which is described in the release notes. Using the ‘AMD APP Profiler’ you can see how much time is taken by each kernel execution and buffer transfer. Also, in each kernel, you can get more detailed information on things like memory reads and writes, and other useful data.</p> + +<p><strong>Can I have multiple threads all using the GPU compute capabilities?</strong></p> + +<p>Yes. There might be a performance impact if the device becomes a bottleneck. However, OpenCL and your GPU driver are designed to coordinate the various threads of execution.</p> + +<p><strong>Can I make method calls from the run method?</strong></p> + +<p>You can generally only make calls to other methods declared in the same class as the initial run() method. Aparapi will follow this call chain to try to determine whether it can create OpenCL. If, for example, Aparapi encounters System.out.println(“Hello World”) ( call to a method not in the users Kernel class) it will detect this and refuse to consider the call chain as an OpenCL candidate.</p> + +<p>One exception to this rule allows a kernel to access or mutate the state of objects held in simple arrays via their setters/getters. For example a kernel can include :-</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="n">out</span><span class="o">[</span><span class="n">i</span><span class="o">].</span><span class="na">setValue</span><span class="o">(</span><span class="n">in</span><span class="o">[</span><span class="n">i</span><span class="o">].</span><span class="na">getValue</span><span class="o">()*</span><span class="mi">5</span><span class="o">);</span> +</code></pre></div> +<p><strong>Does Aparapi support vectorized types?</strong></p> + +<p>Due to Java’s lack of vector types (float4 for example) Aparapi can’t directly use them. Also, due to Java’s lack of operator overloading, simulating these with Java abstracts could lead to very complex and unwieldy code.</p> + +<p><strong>Is there a way I can see the generated OpenCL?</strong></p> + +<p>Yes, by using adding -Dcom.aparapi.enableShowGeneratedOpenCL=true to your command line when you start your JVM.</p> + +<p><strong>Does Aparapi support sharing buffers with JOGL? Can I exploit the features of JOGAMP/glugen?</strong></p> + +<p>Rather than only supporting display-oriented compute, we are pursuing general data parallel compute. Therefore, we have chosen not to bind Aparapi too closely with JOGL.</p> + +<p><strong>What is the performance delta from handcrafted OpenCL?</strong></p> + +<p>This depends heavily on the application. Although we can currently show 20x performance improvement on some compute intensive Java applications compared with the same algorithm using a Java Thread Pool a developer who is prepared to handcraft and hand-tune OpenCL and write custom host code in C/C++ is likely to see better performance than Aparapi may achieve.</p> + +<p>We understand that some user may use Aparapi as a gateway technology to test their Java code before porting to hand-crafted/tuned OpenCL.</p> + +<p><strong>Are you working with Project Lambda for offloading/parallelizing suitable work?</strong></p> + +<p>We are following the progress of Project Lambda (currently scheduled for inclusion in Java 8) and would like to be able to leverage Lambda expression format in Aparapi, but none exists now.</p> + +<p><strong>Can I select a specific GPU if I have more than one card?</strong></p> + +<p>Under review. At present, Aparapi just looks for the first AMD GPU (or APU) device. If the community has feedback on its preference, let us know.</p> + +<p><strong>Can I get the demos/samples presented at JavaOne or ADFS?</strong></p> + +<p>The Squares and Mandlebrot sample code is included in the binary download of Aparapi. The NBody source is not included in the binary (because of a dependency on JOGL). We have, however, included the NBody code as an example project in the Open Source tree (code.google.com/p/aparapi) and provide details and we provide details on how to install the appropriate JOGL components.</p> + +<p><strong>Can Mersenne twister be ported as a random number function inside the kernel class?</strong></p> + +<p>You can elect to implement your own Mersenne twister and use it in our own derived Kernel.</p> + +<p><strong>Does Aparapi use JNI?</strong></p> + +<p>Yes, we do ship a small JNI shim to handle the host OpenCL calls.</p> + +<p><strong>How can I confirm that my code is actually executing on the GPU?</strong></p> + +<p>From within the Java code itself you can query the execution mode after Kernel.execute(n) has returned.</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="n">Kernel</span> <span class="n">kernel</span> <span class="o">=</span> <span class="k">new</span> <span class="n">Kernel</span><span class="o">(){</span> + <span class="nd">@Override</span> <span class="kd">public</span> <span class="kt">void</span> <span class="nf">run</span><span class="o">(){</span> + <span class="o">}</span> +<span class="o">}</span> <span class="o">;</span> +<span class="n">kernel</span><span class="o">.</span><span class="na">execute</span><span class="o">(</span><span class="mi">1024</span><span class="o">);</span> +<span class="n">System</span><span class="o">.</span><span class="na">out</span><span class="o">.</span><span class="na">priintln</span><span class="o">(</span><span class="err">“</span><span class="n">Execution</span> <span class="n">mode</span> <span class="o">=</span> <span class="err">“</span><span class="o">+</span><span class="n">kernel</span><span class="o">.</span><span class="na">getExecutionMode</span><span class="o">());</span> +</code></pre></div> +<p>The above code fragment will print either ‘GPU’ if the kernel executed on the GPU or JTP if Aparapi executed the Kernel in a Java Thread Pool.</p> + +<p>Alternatively, setting the property –Dcom.aparapi.enableShowExecutionModes=true when you start your JVM will cause Aparapi to automatically report the execution mode of all kernels to stdout.</p> + +<p><strong>Why does Aparapi need me to compile my code with -g?</strong></p> + +<p>Aparapi extracts most of the information required to create OpenCL from the bytecode of your Kernel.run() (and run-reachable) methods. We use the debug information to re-create the original variable name and to determine the local variable scope information.</p> + +<p>Of course only the derived Kernel class (or accessed Objects using new Arrays of Objects feature) need to be compiled using -g.</p> + +<p><strong>Why does the Aparapi documentation suggest I use Oracle’s JDK/JRE? Why can’t I use any JVM/JDK?</strong></p> + +<p>The documentation suggests using Oracle’s JDK/JRE for coverage reasons and not as a requirement. AMD focused its testing on Oracle’s JVM/JDK.</p> + +<p>There are two parts to this.</p> + +<ol> +<li>Our bytecode to OpenCL engine is somewhat tuned to the bytecode structures created by javac supplied by Oracle®. Specifically, there are some optimizations that other javac implementation might perform that Aparapi won’t recognize. Eclipse (for example) does not presently use Oracle’s javac, and so we do have some experience handling Eclipse specific bytecode patterns.</li> +<li>At runtime, we piggyback on the (aptly named) sun.misc.Unsafe class, which is included in rt.jar from Oracle®. This class is useful because it helps us avoid some JNI calls by providing low level routines for accessing object field addresses (in real memory) and useful routines for Atomic operations. All accesses to ‘sun.misc.Unsafe’ are handled by an Aparapi class called UnsafeWrapper with the intent that this could be refactored to avoid this dependency.</li> +</ol> + +<p><strong>I am using a dynamic language (Clojure, Scala, Groovy, Beanshell, etc) will I be able to use Aparapi?</strong></p> + +<p>No.</p> + +<p>To access the bytecode for a method Aparapi needs to parse the original class file. For Java code, Aparapi can use something like the code below to reload the class file bytes and parse the constant pool, attributes, fields, methods and method bytecode.</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="n">YourClass</span><span class="o">.</span><span class="na">getClassLoader</span><span class="o">().</span><span class="na">loadAsResource</span><span class="o">(</span><span class="n">YourClass</span><span class="o">.</span><span class="na">getName</span><span class="o">()+</span><span class="s">".class"</span><span class="o">))</span> +</code></pre></div> +<p>It is unlikely that this process would work with a dynamically created class based on the presumption that dynamic languages employ some form of custom classloader to make dynamically generated bytecode available to the JVM. Therefore, it is unlikely that these classloaders would yield the classfile bytes. However, we encourage contributors to investigate opportunities here. Even if the class bytes were loadable, Aparapi would also expect debug information to be available (see previous FAQ entry). Again, this is not impossible for a dynamic language to do, indeed it would probably even be desirable as it would allow the code to be debugged using JDB compatible debugger.</p> + +<p>Finally, Aparapi recognizes bytecode patterns created by the javac supplied by Oracle® and it is possible that the code generated by a particular dynamic language may not be compatible with Aparapi current code analyzer.</p> + +<p>Therefore, at present this is unlikely to work. However, these would be excellent contributions to Aparapi. It would be great to see Aparapi being adopted by other JVM based dynamic language.</p> + +<p><strong>Why does Aparapi seems to be copying data unnecessarily back and forth between host and GPU. Can I stop Aparapi from doing this?</strong></p> + +<p>Aparapi ensures that required data is moved to the GPU prior to kernel execution and returned to the appropriate array before Java execution resumes. Generally, this is what the Java user will expect. However, for some code patterns where multiple Kernel.execute() calls are made in succession (or more likely in a tight loop) Aparapi’s approach may not be optimal.</p> + +<p>In the NewFeatures page we discuss a couple of Aparapi enhancements which will developers to elect intervene to reduce unnecessary copies.</p> + +<p><strong>Do I have to refactor my code to use arrays of primitives? Why can’t Aparapi just work with Java Objects?</strong></p> + +<p>Aparapi creates OpenCL from the bytecode. Generally, OpenCL constrains us to using parallel primitive arrays (OpenCL does indeed allow structs, but Java and OpenCL do not have comparable memory layouts for these structures). Therefore, you will probably need to refactor your code to use primitive arrays. In this initial contribution, we have included some limited support for Arrays of simple Objects and hope contributors extend them. Check the NewFeatures page which shows how you can use this feature.</p> + +</div> +</main> +<footer class='page-footer'> +<div class='container'> +<div class='row'> +<div class='col l4 s12'> +<h5 class='white-text'>Help Aparapi Grow</h5> +<p class='grey-text text-lighten-4'>We are a team of volunteers working on this project like it's our full time job. Any amount would help support and continue development on this project and is greatly appreciated.</p> +<form action='https://www.paypal.com/cgi-bin/webscr' id='paypal-donate' method='post' target='_top'> +<input name='cmd' type='hidden' value='_s-xclick'> +<input name='encrypted' type='hidden' value='-----BEGIN PKCS7-----MIIHoAYJKoZIhvcNAQcEoIIHkTCCB40CAQExggEwMIIBLAIBADCBlDCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20CAQAwDQYJKoZIhvcNAQEBBQAEgYATcKxN8t35TG2x34eY272SuZO3QbGy+BTGIM5DRV6Hmosotzw2TF42ceWmbXb3Gk4Wy5kUgo4TgHExCZHUSlHUl+A9KWLFejotgQJPhbiBsnns3klWbKftA3LEnP/kz/SW7OyBlpluoHoEGb354/aoX3JEctp3akHiZEmD7JyEgjELMAkGBSsOAwIaBQAwggEcBgkqhkiG9w0BBwEwFAYIKoZIhvcNAwcECOGCJwba6JICgIH4RtE1LE3juagKs+swI5tb9Y2LacWo+qn1H1aLKeg57bQMqqcWYvkoO1joYoglPc1h4mO0egZjHPQ6ih0K0IYlXw2SRpNylSlIMUE3GW6smjSSwRhscZfXQYUnmQsfYvkFwoKrlZGf/1u0Q7nwlZ1szIKnDMZ5f+k8xBcM0sMNutn/y9CH6A3zo01gQBIF29+1WYAoQspNAnfWQy3ydV7nbjIA9ThDp2WquWw3EVlvqlvm/3C2AFuH/L4q0ltn3qjkCdzXK0O2jW3TRrzligPkAy6CN0Tw2jGW5GENNC1L92vHFH4kBXUPlhvw39TgoN7/KRUjVoYPYgugggOHMIIDgzCCAuygAwIBAgIBADANBgkqhkiG9w0BAQUFADCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20wHhcNMDQwMjEzMTAxMzE1WhcNMzUwMjEzMTAxMzE1WjCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20wgZ8wDQYJKoZIhvcNAQEBBQADgY0AMIGJAoGBAMFHTt38RMxLXJyO2SmS+Ndl72T7oKJ4u4uw+6awntALWh03PewmIJuzbALScsTS4sZoS1fKciBGoh11gIfHzylvkdNe/hJl66/RGqrj5rFb08sAABNTzDTiqqNpJeBsYs/c2aiGozptX2RlnBktH+SUNpAajW724Nv2Wvhif6sFAgMBAAGjge4wgeswHQYDVR0OBBYEFJaffLvGbxe9WT9S1wob7BDWZJRrMIG7BgNVHSMEgbMwgbCAFJaffLvGbxe9WT9S1wob7BDWZJRroYGUpIGRMIGOMQswCQYDVQQGEwJVUzELMAkGA1UECBMCQ0ExFjAUBgNVBAcTDU1vdW50YWluIFZpZXcxFDASBgNVBAoTC1BheVBhbCBJbmMuMRMwEQYDVQQLFApsaXZlX2NlcnRzMREwDwYDVQQDFAhsaXZlX2FwaTEcMBoGCSqGSIb3DQEJARYNcmVAcGF5cGFsLmNvbYIBADAMBgNVHRMEBTADAQH/MA0GCSqGSIb3DQEBBQUAA4GBAIFfOlaagFrl71+jq6OKidbWFSE+Q4FqROvdgIONth+8kSK//Y/4ihuE4Ymvzn5ceE3S/iBSQQMjyvb+s2TWbQYDwcp129OPIbD9epdr4tJOUNiSojw7BHwYRiPh58S1xGlFgHFXwrEBb3dgNbMUa+u4qectsMAXpVHnD9wIyfmHMYIBmjCCAZYCAQEwgZQwgY4xCzAJBgNVBAYTAlVTMQswCQYDVQQIEwJDQTEWMBQGA1UEBxMNTW91bnRhaW4gVmlldzEUMBIGA1UEChMLUGF5UGFsIEluYy4xEzARBgNVBAsUCmxpdmVfY2VydHMxETAPBgNVBAMUCGxpdmVfYXBpMRwwGgYJKoZIhvcNAQkBFg1yZUBwYXlwYWwuY29tAgEAMAkGBSsOAwIaBQCgXTAYBgkqhkiG9w0BCQMxCwYJKoZIhvcNAQcBMBwGCSqGSIb3DQEJBTEPFw0xNjExMjkyMjA1NTNaMCMGCSqGSIb3DQEJBDEWBBS4i3Exr/pFcKOJy8uKmH+nGIMjqDANBgkqhkiG9w0BAQEFAASBgDAbFZ2jieloeB/0wCAcvYCFAIXmmBaMS5js/byzU5gK7exSTlRMX74IkmHemItaOcw3wyFlu4i118D9K9SbSbFiX9DGDcezGh42u/6G8TuZMwlvmiehwMioTVcm4jWG40YLiv8pJZypfoSx2w4IAFb4na5i/E1qOrwQOpiBho+s-----END PKCS7----- +'> +<button alt='PayPal - The safer, easier way to pay online!' class='btn waves-effect waves-light red lighten-3' name='action' type='submit'> +Donate Now +</button> +</form> +</div> +<div class='col l4 s12'> +<h5 class='white-text'>Join the Discussion</h5> +<p class='grey-text text-lighten-4'>We have a Gitter chat room set up where you can talk directly with us. Come in and discuss new features, future goals, general problems or questions, or anything else you can think of.</p> +<a class='btn waves-effect waves-light red lighten-3' href='https://gitter.im/Syncleus/aparapi' target='_blank'>Chat</a> +</div> +<div class='col l4 s12' style='overflow: hidden;'> +<h5 class='white-text'>Connect</h5> +<iframe allowtransparency='true' frameborder='0' height='30' scrolling='0' src='http://ghbtns.com/github-btn.html?user=Syncleus&repo=aparapi&type=watch&count=true&size=large' width='170'></iframe> +<br> +<a class='twitter-follow-button' data-dnt='true' data-show-count='true' data-size='large' href='https://twitter.com/AparapiLib'>Follow @AparapiLib</a> +<br> +<div class='g-follow' data-annotation='bubble' data-height='24' data-href='https://plus.google.com/102266131584900704956' data-rel='publisher'></div> +</div> +</div> +</div> +<div class='footer-copyright'> +<div class='container'> +© 2016-2017 Syncleus, All rights reserved. +<a class='grey-text text-lighten-4 right' href='https://github.com/Syncleus/aparapi/blob/master/LICENSE'>Apache License v2</a> +</div> +</div> +</footer> +<!-- Scripts --> +<script src='https://code.jquery.com/jquery-2.1.4.min.js'></script> +<script> + if (!window.jQuery) { document.write('<script src="bin/jquery-2.1.1.min.js"><\/script>'); } +</script> +<script src='/javascripts/jquery.timeago.js'></script> +<script src='/javascripts/materialize.min.js'></script> +<script src='/javascripts/lunr.min.js'></script> +<script src='/javascripts/search.js'></script> +<script src='/javascripts/materialize.js'></script> +<script src='/javascripts/init.js'></script> +<!-- Twitter Button --> +<script> + !function(d,s,id){var js,fjs=d.getElementsByTagName(s)[0],p=/^http:/.test(d.location)?'http':'https';if(!d.getElementById(id)){js=d.createElement(s);js.id=id;js.src=p+'://platform.twitter.com/widgets.js';fjs.parentNode.insertBefore(js,fjs);}}(document, 'script', 'twitter-wjs'); +</script> +<!-- Google Plus Button --> +<script async='' defer='defer' src='https://apis.google.com/js/platform.js'></script> +</body> </html> diff --git a/introduction/getting-started.html b/introduction/getting-started.html index 8b3e285245d77951b69872e92dedc547a43938c8..bb851fb28cdced89ca4283ee736291f8492604b1 100644 --- a/introduction/getting-started.html +++ b/introduction/getting-started.html @@ -1,203 +1,204 @@ <!DOCTYPE html> <html lang='en'> - <head> - <meta content='text/html; charset=UTF-8' http-equiv='Content-Type'> - <meta content='width=device-width, initial-scale=1' name='viewport'> - <meta content='IE=edge' http-equiv='X-UA-Compatible'> - <meta content='no' name='msapplication-tap-highlight'> - <meta content='Aparapi is an Open-source framework for executing native Java code on the GPU, developed by Syncleus.' name='description'> - <title> - Aparapi | Getting Started - </title> - <!-- Favicons --> - <link href='/images/favicon/apple-touch-icon-152x152.png' rel='apple-touch-icon-precomposed'> - <meta content='#FFFFFF' name='msapplication-TileColor'> - <meta content='/images/favicon/mstile-144x144.png' name='msapplication-TileImage'> - <link href='/images/favicon/favicon-32x32.png' rel='icon' sizes='32x32'> - <!-- Android 5 Chrome Color --> - <meta content='#EE6E73' name='theme-color'> - <!-- CSS --> - <link href='/stylesheets/highlight.css' media='screen,projection' rel='stylesheet' type='text/css'> - <link href='/stylesheets/style.css' media='screen,projection' rel='stylesheet' type='text/css'> - <link href='http://fonts.googleapis.com/css?family=Inconsolata' rel='stylesheet' type='text/css'> - <link href='http://fonts.googleapis.com/icon?family=Material+Icons' rel='stylesheet'> - </head> - <body> - <header> - <div class='container'> - <a class='button-collapse top-nav waves-effect waves-light circle hide-on-large-only' data-activates='nav-mobile' href='#'> - <i class='material-icons'>menu</i> - </a> - </div> - <ul class='side-nav fixed' id='nav-mobile'> - <li class='logo'> - <a class='brand-logo' href='/' id='logo-container'> - <object data='/images/logo.svg' id='front-page-logo' type='image/svg+xml'>Your browser does not support SVG</object> - </a> - </li> - <li class='search'> - <div class='search-wrapper card'> - <input id='search'> - <i class='material-icons'>search</i> - <div class='search-results'></div> - </div> - </li> - <li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/">Overview</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Introduction</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/about.html">About</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/getting-started.html">Getting Started</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/faq.html">FAQ</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Documentation</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/aparapi-patterns.html">Aparapi Patterns</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/choosing-specific-devices.html">Choosing Specific Devices</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/converting-java-to-opencl.html">Converting Java to OpenCL</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/emulating-multiple-entrypoints.html">Emulating Multiple Entrypoints</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/explicit-buffer-handling.html">Explicit Buffer Handling</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/hsa-enabled-lambda.html">HSA Enabled Lambda</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/kernel-guidelines.html">Kernel Guidelines</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/library-agent-duality.html">Library Agent Duality</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/new-features.html">New Features</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/opencl-bindings.html">OpenCL Bindings</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/private-memory-space.html">Private Memory Space</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/profiling-the-kernel.html">Profiling the Kernel</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/setting-up-hsa.html">Setting Up HSA</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/unit-tests.html">Unit Tests</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/using-hsa-simulator.html">Using HSA Simulator</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/constant-memory.html">Constant Memory</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/local-memory.html">Local Memory</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/multiple-dim-ranges.html">Multiple Dim Ranges</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Proposals</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/multiple-dim-nd-range.html">Multiple Dim ND Range</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/lambdas.html">Lambdas</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/address-space-with-buffers.html">Address Space with Buffers</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/extensions.html">Extensions</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/device.html">Device</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/multiple-entry-points.html">Multiple Entry Points</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/lambda-syntax.html">Lambda Syntax</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/showcase.html">Showcase</a></li></ul></li> - </ul> - </header> - <main> - <div class='section no-pad-bot' id='index-banner'> - <div class='container'> - <h1 class='header center-on-small-only'>Getting Started</h1> - <div class='row center'> - <h4 class='header col s12 light center'>A framework for executing native Java code on the GPU.</h4> - </div> - - </div> - - </div> - <div class='container'> - <p><strong>Licensed under the Apache Software License v2</strong></p> - - <p>Aparapi allows developers to write native Java code capable of being executed directly on a graphics card GPU by converting Java byte code to an OpenCL kernel dynamically at runtime. Because it is backed by OpenCL Aparapi is compatible with all OpenCL compatible Graphics Cards.</p> - - <p>A GPU has a unique architecture that causes them to behave differently than a CPU. One of the most noticeable differences is that while a typical CPU has less than a dozen cores a high end GPU may have hundreds of cores. This makes them uniquely suited for data-parallel computation that can result in speedups hundreds of times more than what is capable with your average CPU. This can mean the difference between needing a whole data center to house your application versus just one or two computers, potentially saving millions in server costs.</p> - - <p>Aparapi was originally a project conceived and developed by AMD corporation. It was later abandoned by AMD and sat mostly idle for several years. Despite this there were some failed efforts by the community to keep the project alive, but without a clear community leader no new releases ever came. Eventually we came along and rescued the project. Finally after such a long wait the first Aparapi release in 5 years was published and the community continues to push forward with renewed excitement.</p> - - <p>Below you will find two side-by-side comparisons for the nbody problem on a CPU vs a GPU. The simulation is being run on an inexpensive graphics card; you can even run it yourself from the <a href="https://github.com/Syncleus/aparapi-examples" rel="nofollow">examples project</a>. Its obvious the drastic performance gains that can be acheived with Aparapi.</p> - - <p><img src="http://aparapi.com/images/nbody_gpu.gif" alt="NBody GPU" /> <img src="http://aparapi.com/images/nbody_cpu.gif" alt="NBody CPU" /></p> - - <h2>Support and Documentation</h2> - - <p>Aparapi Javadocs: <a href="http://www.javadoc.io/doc/com.aparapi/aparapi" rel="nofollow">latest</a> - <a href="http://www.javadoc.io/doc/com.aparapi/aparapi/1.4.0" rel="nofollow">1.4.0</a> - <a href="http://www.javadoc.io/doc/com.aparapi/aparapi/1.3.4" rel="nofollow">1.3.4</a> - <a href="http://www.javadoc.io/doc/com.aparapi/aparapi/1.3.3" rel="nofollow">1.3.3</a> - <a href="http://www.javadoc.io/doc/com.aparapi/aparapi/1.3.2" rel="nofollow">1.3.2</a> - <a href="http://www.javadoc.io/doc/com.aparapi/aparapi/1.3.1" rel="nofollow">1.3.1</a> - <a href="http://www.javadoc.io/doc/com.aparapi/aparapi/1.3.0" rel="nofollow">1.3.0</a> - <a href="http://www.javadoc.io/doc/com.aparapi/aparapi/1.2.0" rel="nofollow">1.2.0</a> - <a href="http://www.javadoc.io/doc/com.aparapi/aparapi/1.1.2" rel="nofollow">1.1.2</a> - <a href="http://www.javadoc.io/doc/com.aparapi/aparapi/1.1.1" rel="nofollow">1.1.1</a> - <a href="http://www.javadoc.io/doc/com.aparapi/aparapi/1.1.0" rel="nofollow">1.1.0</a> - <a href="http://www.javadoc.io/doc/com.syncleus.aparapi/aparapi/1.0.0" rel="nofollow">1.0.0</a></p> - - <p>For detailed documentation see <a href="http://Aparapi.com" rel="nofollow">Aparapi.com</a> or check out the <a href="http://www.javadoc.io/doc/com.aparapi/aparapi" rel="nofollow">latest Javadocs</a>.</p> - - <p>For support please use <a href="https://gitter.im/Syncleus/aparapi" rel="nofollow">Gitter</a> or the <a href="https://groups.google.com/d/forum/aparapi" rel="nofollow">official Aparapi mailing list</a>.</p> - - <p>Please file bugs and feature requests on <a href="https://github.com/Syncleus/aparapi/issues" rel="nofollow">Github</a>.</p> - - <p>Aparapi conforms to the <a href="http://semver.org/spec/v2.0.0.html" rel="nofollow">Semantic Versioning 2.0.0</a> standard. That means the version of a release isnt arbitrary but rather describes how the library interfaces have changed. Read more about it at the <a href="http://semver.org/spec/v2.0.0.html" rel="nofollow">Semantic Versioning page</a>.</p> - - <h2>Related Projects</h2> - - <p>This particular repository only represents the core Java library. There are several other related repositories worth taking a look at.</p> - - <ul> - <li><a href="https://github.com/Syncleus/aparapi-examples" rel="nofollow">Aparapi Examples</a> - A collection of Java examples to showcase the Aparapi library and help developers get started.</li> - <li><a href="https://github.com/Syncleus/aparapi-jni" rel="nofollow">Aparapi JNI</a> - A java JAR which embeds and loads the native components at runtime. This prevents the need to seperately install the Aparapi Native library.</li> - <li><a href="https://github.com/Syncleus/aparapi-native" rel="nofollow">Aparapi Native</a> - The native library component. Without this the Java library can’t talk to the graphics card. This is not a java project but rather a C/C++ project.</li> - <li><a href="https://github.com/Syncleus/aparapi-archlinux" rel="nofollow">Aparapi Archlinux AUR</a> - An Archlinux AUR for installing the Aparapi JNI.</li> - <li><a href="https://github.com/Syncleus/aparapi-archlinux-repo" rel="nofollow">Aparapi Archlinux Repository</a> - A Archlinux binary repository containing all versions of the Aparapi JNI for easy installation.</li> - </ul> - - <h2>Prerequisites</h2> - - <p>Aparapi will run as-is on the CPU, however in order to access the GPU it requires OpenCL to be installed on the local system. If OpenCL isnt found then the library will just fallback to CPU mode. Aparapi supports, and has been tested on, both OpenCL 1.2 and OpenCL 2.0.</p> - - <p><strong>Aparapi runs on all operating systems and platforms, however GPU acceleration support is currently provided for the following platforms: Windows 64bit, Windows 32bit, Mac OSX 64bit, Linux 64bit, and Linux 32bit.</strong></p> - - <p>Note: It is no longer required to manually install the <a href="https://github.com/Syncleus/aparapi-native" rel="nofollow">Aparapi JNI native interface</a>, this is now done automatically through maven as a dependency on Aparapi.</p> - - <h2>Java Dependency</h2> - - <p>To include Aparapi in your project of choice include the following Maven dependency into your build.</p> - <pre class="highlight xml"><code> - <span class="nt"><dependency></span> - <span class="nt"><groupId></span>com.aparapi<span class="nt"></groupId></span> - <span class="nt"><artifactId></span>aparapi<span class="nt"></artifactId></span> - <span class="nt"><version></span>1.4.0<span class="nt"></version></span> - <span class="nt"></dependency></span> - </code></pre> - <h2>Obtaining the Source</h2> - - <p>The official source repository for Aparapi is located in the Syncleus Github repository and can be cloned using the - following command.</p> - <pre class="highlight shell"><code> - git clone https://github.com/Syncleus/aparapi.git - </code></pre> - <h2>Getting Started</h2> - - <p>With Aparapi we can take a sequential loop such as this (which adds each element from inA and inB arrays and puts the result in result).</p> - <pre class="highlight java"><code> - <span class="kd">final</span> <span class="kt">float</span> <span class="n">inA</span><span class="o">[]</span> <span class="o">=</span> <span class="o">....</span> <span class="c1">// get a float array of data from somewhere</span> - <span class="kd">final</span> <span class="kt">float</span> <span class="n">inB</span><span class="o">[]</span> <span class="o">=</span> <span class="o">....</span> <span class="c1">// get a float array of data from somewhere</span> - <span class="k">assert</span> <span class="o">(</span><span class="n">inA</span><span class="o">.</span><span class="na">length</span> <span class="o">==</span> <span class="n">inB</span><span class="o">.</span><span class="na">length</span><span class="o">);</span> - <span class="kd">final</span> <span class="kt">float</span> <span class="n">result</span> <span class="o">=</span> <span class="k">new</span> <span class="kt">float</span><span class="o">[</span><span class="n">inA</span><span class="o">.</span><span class="na">length</span><span class="o">];</span> - - <span class="k">for</span> <span class="o">(</span><span class="kt">int</span> <span class="n">i</span> <span class="o">=</span> <span class="mi">0</span><span class="o">;</span> <span class="n">i</span> <span class="o"><</span> <span class="n">array</span><span class="o">.</span><span class="na">length</span><span class="o">;</span> <span class="n">i</span><span class="o">++)</span> <span class="o">{</span> - <span class="n">result</span><span class="o">[</span><span class="n">i</span><span class="o">]</span> <span class="o">=</span> <span class="n">intA</span><span class="o">[</span><span class="n">i</span><span class="o">]</span> <span class="o">+</span> <span class="n">inB</span><span class="o">[</span><span class="n">i</span><span class="o">];</span> - <span class="o">}</span> - </code></pre> - <p>And refactor the sequential loop to the following form:</p> - <pre class="highlight java"><code> - <span class="n">Kernel</span> <span class="n">kernel</span> <span class="o">=</span> <span class="k">new</span> <span class="n">Kernel</span><span class="o">()</span> <span class="o">{</span> - <span class="nd">@Override</span> - <span class="kd">public</span> <span class="kt">void</span> <span class="nf">run</span><span class="o">()</span> <span class="o">{</span> - <span class="kt">int</span> <span class="n">i</span> <span class="o">=</span> <span class="n">getGlobalId</span><span class="o">();</span> - <span class="n">result</span><span class="o">[</span><span class="n">i</span><span class="o">]</span> <span class="o">=</span> <span class="n">intA</span><span class="o">[</span><span class="n">i</span><span class="o">]</span> <span class="o">+</span> <span class="n">inB</span><span class="o">[</span><span class="n">i</span><span class="o">];</span> - <span class="o">}</span> - <span class="o">};</span> - - <span class="n">Range</span> <span class="n">range</span> <span class="o">=</span> <span class="n">Range</span><span class="o">.</span><span class="na">create</span><span class="o">(</span><span class="n">result</span><span class="o">.</span><span class="na">length</span><span class="o">);</span> - <span class="n">kernel</span><span class="o">.</span><span class="na">execute</span><span class="o">(</span><span class="n">range</span><span class="o">);</span> - </code></pre> - </div> - </main> - <footer class='page-footer'> - <div class='container'> - <div class='row'> - <div class='col l4 s12'> - <h5 class='white-text'>Help Aparapi Grow</h5> - <p class='grey-text text-lighten-4'>We are a team of volunteers working on this project like it's our full time job. Any amount would help support and continue development on this project and is greatly appreciated.</p> - <form action='https://www.paypal.com/cgi-bin/webscr' id='paypal-donate' method='post' target='_top'> - <input name='cmd' type='hidden' value='_s-xclick'> - <input name='encrypted' type='hidden' value='-----BEGIN PKCS7-----MIIHoAYJKoZIhvcNAQcEoIIHkTCCB40CAQExggEwMIIBLAIBADCBlDCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20CAQAwDQYJKoZIhvcNAQEBBQAEgYATcKxN8t35TG2x34eY272SuZO3QbGy+BTGIM5DRV6Hmosotzw2TF42ceWmbXb3Gk4Wy5kUgo4TgHExCZHUSlHUl+A9KWLFejotgQJPhbiBsnns3klWbKftA3LEnP/kz/SW7OyBlpluoHoEGb354/aoX3JEctp3akHiZEmD7JyEgjELMAkGBSsOAwIaBQAwggEcBgkqhkiG9w0BBwEwFAYIKoZIhvcNAwcECOGCJwba6JICgIH4RtE1LE3juagKs+swI5tb9Y2LacWo+qn1H1aLKeg57bQMqqcWYvkoO1joYoglPc1h4mO0egZjHPQ6ih0K0IYlXw2SRpNylSlIMUE3GW6smjSSwRhscZfXQYUnmQsfYvkFwoKrlZGf/1u0Q7nwlZ1szIKnDMZ5f+k8xBcM0sMNutn/y9CH6A3zo01gQBIF29+1WYAoQspNAnfWQy3ydV7nbjIA9ThDp2WquWw3EVlvqlvm/3C2AFuH/L4q0ltn3qjkCdzXK0O2jW3TRrzligPkAy6CN0Tw2jGW5GENNC1L92vHFH4kBXUPlhvw39TgoN7/KRUjVoYPYgugggOHMIIDgzCCAuygAwIBAgIBADANBgkqhkiG9w0BAQUFADCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20wHhcNMDQwMjEzMTAxMzE1WhcNMzUwMjEzMTAxMzE1WjCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20wgZ8wDQYJKoZIhvcNAQEBBQADgY0AMIGJAoGBAMFHTt38RMxLXJyO2SmS+Ndl72T7oKJ4u4uw+6awntALWh03PewmIJuzbALScsTS4sZoS1fKciBGoh11gIfHzylvkdNe/hJl66/RGqrj5rFb08sAABNTzDTiqqNpJeBsYs/c2aiGozptX2RlnBktH+SUNpAajW724Nv2Wvhif6sFAgMBAAGjge4wgeswHQYDVR0OBBYEFJaffLvGbxe9WT9S1wob7BDWZJRrMIG7BgNVHSMEgbMwgbCAFJaffLvGbxe9WT9S1wob7BDWZJRroYGUpIGRMIGOMQswCQYDVQQGEwJVUzELMAkGA1UECBMCQ0ExFjAUBgNVBAcTDU1vdW50YWluIFZpZXcxFDASBgNVBAoTC1BheVBhbCBJbmMuMRMwEQYDVQQLFApsaXZlX2NlcnRzMREwDwYDVQQDFAhsaXZlX2FwaTEcMBoGCSqGSIb3DQEJARYNcmVAcGF5cGFsLmNvbYIBADAMBgNVHRMEBTADAQH/MA0GCSqGSIb3DQEBBQUAA4GBAIFfOlaagFrl71+jq6OKidbWFSE+Q4FqROvdgIONth+8kSK//Y/4ihuE4Ymvzn5ceE3S/iBSQQMjyvb+s2TWbQYDwcp129OPIbD9epdr4tJOUNiSojw7BHwYRiPh58S1xGlFgHFXwrEBb3dgNbMUa+u4qectsMAXpVHnD9wIyfmHMYIBmjCCAZYCAQEwgZQwgY4xCzAJBgNVBAYTAlVTMQswCQYDVQQIEwJDQTEWMBQGA1UEBxMNTW91bnRhaW4gVmlldzEUMBIGA1UEChMLUGF5UGFsIEluYy4xEzARBgNVBAsUCmxpdmVfY2VydHMxETAPBgNVBAMUCGxpdmVfYXBpMRwwGgYJKoZIhvcNAQkBFg1yZUBwYXlwYWwuY29tAgEAMAkGBSsOAwIaBQCgXTAYBgkqhkiG9w0BCQMxCwYJKoZIhvcNAQcBMBwGCSqGSIb3DQEJBTEPFw0xNjExMjkyMjA1NTNaMCMGCSqGSIb3DQEJBDEWBBS4i3Exr/pFcKOJy8uKmH+nGIMjqDANBgkqhkiG9w0BAQEFAASBgDAbFZ2jieloeB/0wCAcvYCFAIXmmBaMS5js/byzU5gK7exSTlRMX74IkmHemItaOcw3wyFlu4i118D9K9SbSbFiX9DGDcezGh42u/6G8TuZMwlvmiehwMioTVcm4jWG40YLiv8pJZypfoSx2w4IAFb4na5i/E1qOrwQOpiBho+s-----END PKCS7-----'> - <button alt='PayPal - The safer, easier way to pay online!' class='btn waves-effect waves-light red lighten-3' name='action' type='submit'> - Donate Now - </button> - </form> - </div> - <div class='col l4 s12'> - <h5 class='white-text'>Join the Discussion</h5> - <p class='grey-text text-lighten-4'>We have a Gitter chat room set up where you can talk directly with us. Come in and discuss new features, future goals, general problems or questions, or anything else you can think of.</p> - <a class='btn waves-effect waves-light red lighten-3' href='https://gitter.im/Syncleus/aparapi' target='_blank'>Chat</a> - </div> - <div class='col l4 s12' style='overflow: hidden;'> - <h5 class='white-text'>Connect</h5> - <iframe allowtransparency='true' frameborder='0' height='30' scrolling='0' src='http://ghbtns.com/github-btn.html?user=Syncleus&repo=aparapi&type=watch&count=true&size=large' width='170'></iframe> - <br> - <a class='twitter-follow-button' data-dnt='true' data-show-count='true' data-size='large' href='https://twitter.com/AparapiLib'>Follow @AparapiLib</a> - <br> - <div class='g-follow' data-annotation='bubble' data-height='24' data-href='https://plus.google.com/102266131584900704956' data-rel='publisher'></div> - </div> - </div> - </div> - <div class='footer-copyright'> - <div class='container'> - © 2016-2017 Syncleus, All rights reserved. - <a class='grey-text text-lighten-4 right' href='https://github.com/Syncleus/aparapi/blob/master/LICENSE'>Apache License v2</a> - </div> - </div> - </footer> - <!-- Scripts --> - <script src='https://code.jquery.com/jquery-2.1.4.min.js'></script> - <script> - if (!window.jQuery) { document.write('<script src="bin/jquery-2.1.1.min.js"><\/script>'); } - </script> - <script src='/javascripts/jquery.timeago.js'></script> - <script src='/javascripts/materialize.min.js'></script> - <script src='/javascripts/lunr.min.js'></script> - <script src='/javascripts/search.js'></script> - <script src='/javascripts/materialize.js'></script> - <script src='/javascripts/init.js'></script> - <!-- Twitter Button --> - <script> - !function(d,s,id){var js,fjs=d.getElementsByTagName(s)[0],p=/^http:/.test(d.location)?'http':'https';if(!d.getElementById(id)){js=d.createElement(s);js.id=id;js.src=p+'://platform.twitter.com/widgets.js';fjs.parentNode.insertBefore(js,fjs);}}(document, 'script', 'twitter-wjs'); - </script> - <!-- Google Plus Button --> - <script async='' defer='defer' src='https://apis.google.com/js/platform.js'></script> - </body> +<head> +<meta content='text/html; charset=UTF-8' http-equiv='Content-Type'> +<meta content='width=device-width, initial-scale=1' name='viewport'> +<meta content='IE=edge' http-equiv='X-UA-Compatible'> +<meta content='no' name='msapplication-tap-highlight'> +<meta content='Aparapi is an Open-source framework for executing native Java code on the GPU, developed by Syncleus.' name='description'> +<title> +Aparapi | Getting Started +</title> +<!-- Favicons --> +<link href='/images/favicon/apple-touch-icon-152x152.png' rel='apple-touch-icon-precomposed'> +<meta content='#FFFFFF' name='msapplication-TileColor'> +<meta content='/images/favicon/mstile-144x144.png' name='msapplication-TileImage'> +<link href='/images/favicon/favicon-32x32.png' rel='icon' sizes='32x32'> +<!-- Android 5 Chrome Color --> +<meta content='#EE6E73' name='theme-color'> +<!-- CSS --> +<link href='/stylesheets/highlight.css' media='screen,projection' rel='stylesheet' type='text/css'> +<link href='/stylesheets/style.css' media='screen,projection' rel='stylesheet' type='text/css'> +<link href='http://fonts.googleapis.com/css?family=Inconsolata' rel='stylesheet' type='text/css'> +<link href='http://fonts.googleapis.com/icon?family=Material+Icons' rel='stylesheet'> +</head> +<body> +<header> +<div class='container'> +<a class='button-collapse top-nav waves-effect waves-light circle hide-on-large-only' data-activates='nav-mobile' href='#'> +<i class='material-icons'>menu</i> +</a> +</div> +<ul class='side-nav fixed' id='nav-mobile'> +<li class='logo'> +<a class='brand-logo' href='/' id='logo-container'> +<object data='/images/logo.svg' id='front-page-logo' type='image/svg+xml'>Your browser does not support SVG</object> +</a> +</li> +<li class='search'> +<div class='search-wrapper card'> +<input id='search'> +<i class='material-icons'>search</i> +<div class='search-results'></div> +</div> +</li> +<li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/">Overview</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Introduction</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/about.html">About</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/getting-started.html">Getting Started</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/faq.html">FAQ</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Documentation</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/aparapi-patterns.html">Aparapi Patterns</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/choosing-specific-devices.html">Choosing Specific Devices</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/converting-java-to-opencl.html">Converting Java to OpenCL</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/emulating-multiple-entrypoints.html">Emulating Multiple Entrypoints</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/explicit-buffer-handling.html">Explicit Buffer Handling</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/hsa-enabled-lambda.html">HSA Enabled Lambda</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/kernel-guidelines.html">Kernel Guidelines</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/library-agent-duality.html">Library Agent Duality</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/new-features.html">New Features</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/opencl-bindings.html">OpenCL Bindings</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/private-memory-space.html">Private Memory Space</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/profiling-the-kernel.html">Profiling the Kernel</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/setting-up-hsa.html">Setting Up HSA</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/unit-tests.html">Unit Tests</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/using-hsa-simulator.html">Using HSA Simulator</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/constant-memory.html">Constant Memory</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/local-memory.html">Local Memory</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/multiple-dim-ranges.html">Multiple Dim Ranges</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Proposals</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/multiple-dim-nd-range.html">Multiple Dim ND Range</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/lambdas.html">Lambdas</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/address-space-with-buffers.html">Address Space with Buffers</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/extensions.html">Extensions</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/device.html">Device</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/multiple-entry-points.html">Multiple Entry Points</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/lambda-syntax.html">Lambda Syntax</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/showcase.html">Showcase</a></li></ul></li> +</ul> +</header> +<main> +<div class='section no-pad-bot' id='index-banner'> +<div class='container'> +<h1 class='header center-on-small-only'>Getting Started</h1> +<div class='row center'> +<h4 class='header col s12 light center'>A framework for executing native Java code on the GPU.</h4> +</div> + +</div> + +</div> +<div class='container'> +<p><strong>Licensed under the Apache Software License v2</strong></p> + +<p>Aparapi allows developers to write native Java code capable of being executed directly on a graphics card GPU by converting Java byte code to an OpenCL kernel dynamically at runtime. Because it is backed by OpenCL Aparapi is compatible with all OpenCL compatible Graphics Cards.</p> + +<p>A GPU has a unique architecture that causes them to behave differently than a CPU. One of the most noticeable differences is that while a typical CPU has less than a dozen cores a high end GPU may have hundreds of cores. This makes them uniquely suited for data-parallel computation that can result in speedups hundreds of times more than what is capable with your average CPU. This can mean the difference between needing a whole data center to house your application versus just one or two computers, potentially saving millions in server costs.</p> + +<p>Aparapi was originally a project conceived and developed by AMD corporation. It was later abandoned by AMD and sat mostly idle for several years. Despite this there were some failed efforts by the community to keep the project alive, but without a clear community leader no new releases ever came. Eventually we came along and rescued the project. Finally after such a long wait the first Aparapi release in 5 years was published and the community continues to push forward with renewed excitement.</p> + +<p>Below you will find two side-by-side comparisons for the nbody problem on a CPU vs a GPU. The simulation is being run on an inexpensive graphics card; you can even run it yourself from the <a href="https://github.com/Syncleus/aparapi-examples" rel="nofollow">examples project</a>. Its obvious the drastic performance gains that can be acheived with Aparapi.</p> + +<p><img src="http://aparapi.com/images/nbody_gpu.gif" alt="NBody GPU" /> <img src="http://aparapi.com/images/nbody_cpu.gif" alt="NBody CPU" /></p> + +<h2>Support and Documentation</h2> + +<p>Aparapi Javadocs: <a href="http://www.javadoc.io/doc/com.aparapi/aparapi" rel="nofollow">latest</a> - <a href="http://www.javadoc.io/doc/com.aparapi/aparapi/1.4.1" rel="nofollow">1.4.1</a> - <a href="http://www.javadoc.io/doc/com.aparapi/aparapi/1.4.0" rel="nofollow">1.4.0</a> - <a href="http://www.javadoc.io/doc/com.aparapi/aparapi/1.3.4" rel="nofollow">1.3.4</a> - <a href="http://www.javadoc.io/doc/com.aparapi/aparapi/1.3.3" rel="nofollow">1.3.3</a> - <a href="http://www.javadoc.io/doc/com.aparapi/aparapi/1.3.2" rel="nofollow">1.3.2</a> - <a href="http://www.javadoc.io/doc/com.aparapi/aparapi/1.3.1" rel="nofollow">1.3.1</a> - <a href="http://www.javadoc.io/doc/com.aparapi/aparapi/1.3.0" rel="nofollow">1.3.0</a> - <a href="http://www.javadoc.io/doc/com.aparapi/aparapi/1.2.0" rel="nofollow">1.2.0</a> - <a href="http://www.javadoc.io/doc/com.aparapi/aparapi/1.1.2" rel="nofollow">1.1.2</a> - <a href="http://www.javadoc.io/doc/com.aparapi/aparapi/1.1.1" rel="nofollow">1.1.1</a> - <a href="http://www.javadoc.io/doc/com.aparapi/aparapi/1.1.0" rel="nofollow">1.1.0</a> - <a href="http://www.javadoc.io/doc/com.syncleus.aparapi/aparapi/1.0.0" rel="nofollow">1.0.0</a></p> + +<p>For detailed documentation see <a href="http://Aparapi.com" rel="nofollow">Aparapi.com</a> or check out the <a href="http://www.javadoc.io/doc/com.aparapi/aparapi" rel="nofollow">latest Javadocs</a>.</p> + +<p>For support please use <a href="https://gitter.im/Syncleus/aparapi" rel="nofollow">Gitter</a> or the <a href="https://groups.google.com/d/forum/aparapi" rel="nofollow">official Aparapi mailing list</a>.</p> + +<p>Please file bugs and feature requests on <a href="https://github.com/Syncleus/aparapi/issues" rel="nofollow">Github</a>.</p> + +<p>Aparapi conforms to the <a href="http://semver.org/spec/v2.0.0.html" rel="nofollow">Semantic Versioning 2.0.0</a> standard. That means the version of a release isnt arbitrary but rather describes how the library interfaces have changed. Read more about it at the <a href="http://semver.org/spec/v2.0.0.html" rel="nofollow">Semantic Versioning page</a>.</p> + +<h2>Related Projects</h2> + +<p>This particular repository only represents the core Java library. There are several other related repositories worth taking a look at.</p> + +<ul> +<li><a href="https://github.com/Syncleus/aparapi-examples" rel="nofollow">Aparapi Examples</a> - A collection of Java examples to showcase the Aparapi library and help developers get started.</li> +<li><a href="https://github.com/Syncleus/aparapi-jni" rel="nofollow">Aparapi JNI</a> - A java JAR which embeds and loads the native components at runtime. This prevents the need to seperately install the Aparapi Native library.</li> +<li><a href="https://github.com/Syncleus/aparapi-native" rel="nofollow">Aparapi Native</a> - The native library component. Without this the Java library can’t talk to the graphics card. This is not a java project but rather a C/C++ project.</li> +<li><a href="https://github.com/Syncleus/aparapi-archlinux" rel="nofollow">Aparapi Archlinux AUR</a> - An Archlinux AUR for installing the Aparapi JNI.</li> +<li><a href="https://github.com/Syncleus/aparapi-archlinux-repo" rel="nofollow">Aparapi Archlinux Repository</a> - A Archlinux binary repository containing all versions of the Aparapi JNI for easy installation.</li> +</ul> + +<h2>Prerequisites</h2> + +<p>Aparapi will run as-is on the CPU, however in order to access the GPU it requires OpenCL to be installed on the local system. If OpenCL isnt found then the library will just fallback to CPU mode. Aparapi supports, and has been tested on, both OpenCL 1.2 and OpenCL 2.0.</p> + +<p><strong>Aparapi runs on all operating systems and platforms, however GPU acceleration support is currently provided for the following platforms: Windows 64bit, Windows 32bit, Mac OSX 64bit, Linux 64bit, and Linux 32bit.</strong></p> + +<p>Note: It is no longer required to manually install the <a href="https://github.com/Syncleus/aparapi-native" rel="nofollow">Aparapi JNI native interface</a>, this is now done automatically through maven as a dependency on Aparapi.</p> + +<h2>Java Dependency</h2> + +<p>To include Aparapi in your project of choice include the following Maven dependency into your build.</p> +<div class="highlight"><pre class="highlight xml"><code> +<span class="nt"><dependency></span> + <span class="nt"><groupId></span>com.aparapi<span class="nt"></groupId></span> + <span class="nt"><artifactId></span>aparapi<span class="nt"></artifactId></span> + <span class="nt"><version></span>1.4.1<span class="nt"></version></span> +<span class="nt"></dependency></span> +</code></pre></div> +<h2>Obtaining the Source</h2> + +<p>The official source repository for Aparapi is located in the Syncleus Github repository and can be cloned using the +following command.</p> +<div class="highlight"><pre class="highlight shell"><code> +git clone https://github.com/Syncleus/aparapi.git +</code></pre></div> +<h2>Getting Started</h2> + +<p>With Aparapi we can take a sequential loop such as this (which adds each element from inA and inB arrays and puts the result in result).</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="kd">final</span> <span class="kt">float</span> <span class="n">inA</span><span class="o">[]</span> <span class="o">=</span> <span class="o">....</span> <span class="c1">// get a float array of data from somewhere</span> +<span class="kd">final</span> <span class="kt">float</span> <span class="n">inB</span><span class="o">[]</span> <span class="o">=</span> <span class="o">....</span> <span class="c1">// get a float array of data from somewhere</span> +<span class="k">assert</span> <span class="o">(</span><span class="n">inA</span><span class="o">.</span><span class="na">length</span> <span class="o">==</span> <span class="n">inB</span><span class="o">.</span><span class="na">length</span><span class="o">);</span> +<span class="kd">final</span> <span class="kt">float</span> <span class="n">result</span> <span class="o">=</span> <span class="k">new</span> <span class="kt">float</span><span class="o">[</span><span class="n">inA</span><span class="o">.</span><span class="na">length</span><span class="o">];</span> + +<span class="k">for</span> <span class="o">(</span><span class="kt">int</span> <span class="n">i</span> <span class="o">=</span> <span class="mi">0</span><span class="o">;</span> <span class="n">i</span> <span class="o"><</span> <span class="n">array</span><span class="o">.</span><span class="na">length</span><span class="o">;</span> <span class="n">i</span><span class="o">++)</span> <span class="o">{</span> + <span class="n">result</span><span class="o">[</span><span class="n">i</span><span class="o">]</span> <span class="o">=</span> <span class="n">inA</span><span class="o">[</span><span class="n">i</span><span class="o">]</span> <span class="o">+</span> <span class="n">inB</span><span class="o">[</span><span class="n">i</span><span class="o">];</span> +<span class="o">}</span> +</code></pre></div> +<p>And refactor the sequential loop to the following form:</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="n">Kernel</span> <span class="n">kernel</span> <span class="o">=</span> <span class="k">new</span> <span class="n">Kernel</span><span class="o">()</span> <span class="o">{</span> + <span class="nd">@Override</span> + <span class="kd">public</span> <span class="kt">void</span> <span class="nf">run</span><span class="o">()</span> <span class="o">{</span> + <span class="kt">int</span> <span class="n">i</span> <span class="o">=</span> <span class="n">getGlobalId</span><span class="o">();</span> + <span class="n">result</span><span class="o">[</span><span class="n">i</span><span class="o">]</span> <span class="o">=</span> <span class="n">inA</span><span class="o">[</span><span class="n">i</span><span class="o">]</span> <span class="o">+</span> <span class="n">inB</span><span class="o">[</span><span class="n">i</span><span class="o">];</span> + <span class="o">}</span> +<span class="o">};</span> + +<span class="n">Range</span> <span class="n">range</span> <span class="o">=</span> <span class="n">Range</span><span class="o">.</span><span class="na">create</span><span class="o">(</span><span class="n">result</span><span class="o">.</span><span class="na">length</span><span class="o">);</span> +<span class="n">kernel</span><span class="o">.</span><span class="na">execute</span><span class="o">(</span><span class="n">range</span><span class="o">);</span> +</code></pre></div> +</div> +</main> +<footer class='page-footer'> +<div class='container'> +<div class='row'> +<div class='col l4 s12'> +<h5 class='white-text'>Help Aparapi Grow</h5> +<p class='grey-text text-lighten-4'>We are a team of volunteers working on this project like it's our full time job. Any amount would help support and continue development on this project and is greatly appreciated.</p> +<form action='https://www.paypal.com/cgi-bin/webscr' id='paypal-donate' method='post' target='_top'> +<input name='cmd' type='hidden' value='_s-xclick'> +<input name='encrypted' type='hidden' value='-----BEGIN PKCS7-----MIIHoAYJKoZIhvcNAQcEoIIHkTCCB40CAQExggEwMIIBLAIBADCBlDCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20CAQAwDQYJKoZIhvcNAQEBBQAEgYATcKxN8t35TG2x34eY272SuZO3QbGy+BTGIM5DRV6Hmosotzw2TF42ceWmbXb3Gk4Wy5kUgo4TgHExCZHUSlHUl+A9KWLFejotgQJPhbiBsnns3klWbKftA3LEnP/kz/SW7OyBlpluoHoEGb354/aoX3JEctp3akHiZEmD7JyEgjELMAkGBSsOAwIaBQAwggEcBgkqhkiG9w0BBwEwFAYIKoZIhvcNAwcECOGCJwba6JICgIH4RtE1LE3juagKs+swI5tb9Y2LacWo+qn1H1aLKeg57bQMqqcWYvkoO1joYoglPc1h4mO0egZjHPQ6ih0K0IYlXw2SRpNylSlIMUE3GW6smjSSwRhscZfXQYUnmQsfYvkFwoKrlZGf/1u0Q7nwlZ1szIKnDMZ5f+k8xBcM0sMNutn/y9CH6A3zo01gQBIF29+1WYAoQspNAnfWQy3ydV7nbjIA9ThDp2WquWw3EVlvqlvm/3C2AFuH/L4q0ltn3qjkCdzXK0O2jW3TRrzligPkAy6CN0Tw2jGW5GENNC1L92vHFH4kBXUPlhvw39TgoN7/KRUjVoYPYgugggOHMIIDgzCCAuygAwIBAgIBADANBgkqhkiG9w0BAQUFADCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20wHhcNMDQwMjEzMTAxMzE1WhcNMzUwMjEzMTAxMzE1WjCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20wgZ8wDQYJKoZIhvcNAQEBBQADgY0AMIGJAoGBAMFHTt38RMxLXJyO2SmS+Ndl72T7oKJ4u4uw+6awntALWh03PewmIJuzbALScsTS4sZoS1fKciBGoh11gIfHzylvkdNe/hJl66/RGqrj5rFb08sAABNTzDTiqqNpJeBsYs/c2aiGozptX2RlnBktH+SUNpAajW724Nv2Wvhif6sFAgMBAAGjge4wgeswHQYDVR0OBBYEFJaffLvGbxe9WT9S1wob7BDWZJRrMIG7BgNVHSMEgbMwgbCAFJaffLvGbxe9WT9S1wob7BDWZJRroYGUpIGRMIGOMQswCQYDVQQGEwJVUzELMAkGA1UECBMCQ0ExFjAUBgNVBAcTDU1vdW50YWluIFZpZXcxFDASBgNVBAoTC1BheVBhbCBJbmMuMRMwEQYDVQQLFApsaXZlX2NlcnRzMREwDwYDVQQDFAhsaXZlX2FwaTEcMBoGCSqGSIb3DQEJARYNcmVAcGF5cGFsLmNvbYIBADAMBgNVHRMEBTADAQH/MA0GCSqGSIb3DQEBBQUAA4GBAIFfOlaagFrl71+jq6OKidbWFSE+Q4FqROvdgIONth+8kSK//Y/4ihuE4Ymvzn5ceE3S/iBSQQMjyvb+s2TWbQYDwcp129OPIbD9epdr4tJOUNiSojw7BHwYRiPh58S1xGlFgHFXwrEBb3dgNbMUa+u4qectsMAXpVHnD9wIyfmHMYIBmjCCAZYCAQEwgZQwgY4xCzAJBgNVBAYTAlVTMQswCQYDVQQIEwJDQTEWMBQGA1UEBxMNTW91bnRhaW4gVmlldzEUMBIGA1UEChMLUGF5UGFsIEluYy4xEzARBgNVBAsUCmxpdmVfY2VydHMxETAPBgNVBAMUCGxpdmVfYXBpMRwwGgYJKoZIhvcNAQkBFg1yZUBwYXlwYWwuY29tAgEAMAkGBSsOAwIaBQCgXTAYBgkqhkiG9w0BCQMxCwYJKoZIhvcNAQcBMBwGCSqGSIb3DQEJBTEPFw0xNjExMjkyMjA1NTNaMCMGCSqGSIb3DQEJBDEWBBS4i3Exr/pFcKOJy8uKmH+nGIMjqDANBgkqhkiG9w0BAQEFAASBgDAbFZ2jieloeB/0wCAcvYCFAIXmmBaMS5js/byzU5gK7exSTlRMX74IkmHemItaOcw3wyFlu4i118D9K9SbSbFiX9DGDcezGh42u/6G8TuZMwlvmiehwMioTVcm4jWG40YLiv8pJZypfoSx2w4IAFb4na5i/E1qOrwQOpiBho+s-----END PKCS7----- +'> +<button alt='PayPal - The safer, easier way to pay online!' class='btn waves-effect waves-light red lighten-3' name='action' type='submit'> +Donate Now +</button> +</form> +</div> +<div class='col l4 s12'> +<h5 class='white-text'>Join the Discussion</h5> +<p class='grey-text text-lighten-4'>We have a Gitter chat room set up where you can talk directly with us. Come in and discuss new features, future goals, general problems or questions, or anything else you can think of.</p> +<a class='btn waves-effect waves-light red lighten-3' href='https://gitter.im/Syncleus/aparapi' target='_blank'>Chat</a> +</div> +<div class='col l4 s12' style='overflow: hidden;'> +<h5 class='white-text'>Connect</h5> +<iframe allowtransparency='true' frameborder='0' height='30' scrolling='0' src='http://ghbtns.com/github-btn.html?user=Syncleus&repo=aparapi&type=watch&count=true&size=large' width='170'></iframe> +<br> +<a class='twitter-follow-button' data-dnt='true' data-show-count='true' data-size='large' href='https://twitter.com/AparapiLib'>Follow @AparapiLib</a> +<br> +<div class='g-follow' data-annotation='bubble' data-height='24' data-href='https://plus.google.com/102266131584900704956' data-rel='publisher'></div> +</div> +</div> +</div> +<div class='footer-copyright'> +<div class='container'> +© 2016-2017 Syncleus, All rights reserved. +<a class='grey-text text-lighten-4 right' href='https://github.com/Syncleus/aparapi/blob/master/LICENSE'>Apache License v2</a> +</div> +</div> +</footer> +<!-- Scripts --> +<script src='https://code.jquery.com/jquery-2.1.4.min.js'></script> +<script> + if (!window.jQuery) { document.write('<script src="bin/jquery-2.1.1.min.js"><\/script>'); } +</script> +<script src='/javascripts/jquery.timeago.js'></script> +<script src='/javascripts/materialize.min.js'></script> +<script src='/javascripts/lunr.min.js'></script> +<script src='/javascripts/search.js'></script> +<script src='/javascripts/materialize.js'></script> +<script src='/javascripts/init.js'></script> +<!-- Twitter Button --> +<script> + !function(d,s,id){var js,fjs=d.getElementsByTagName(s)[0],p=/^http:/.test(d.location)?'http':'https';if(!d.getElementById(id)){js=d.createElement(s);js.id=id;js.src=p+'://platform.twitter.com/widgets.js';fjs.parentNode.insertBefore(js,fjs);}}(document, 'script', 'twitter-wjs'); +</script> +<!-- Google Plus Button --> +<script async='' defer='defer' src='https://apis.google.com/js/platform.js'></script> +</body> </html> diff --git a/javascripts/jquery.timeago.js b/javascripts/jquery.timeago.js index 855dfd8219e5e1859e718dada365ed2acc44809d..427ad7d5d517cbb0415d52a63f613a74a2aaefe6 100644 --- a/javascripts/jquery.timeago.js +++ b/javascripts/jquery.timeago.js @@ -3,7 +3,7 @@ * updating fuzzy timestamps (e.g. "4 minutes ago" or "about 1 day ago"). * * @name timeago - * @version 1.5.3 + * @version 1.5.4 * @requires jQuery v1.2.3+ * @author Ryan McGeary * @license MIT License - http://www.opensource.org/licenses/mit-license.php @@ -11,7 +11,7 @@ * For usage and examples, visit: * http://timeago.yarp.com/ * - * Copyright (c) 2008-2015, Ryan McGeary (ryan -[at]- mcgeary [*dot*] org) + * Copyright (c) 2008-2017, Ryan McGeary (ryan -[at]- mcgeary [*dot*] org) */ @@ -141,6 +141,7 @@ // functions are called with context of a single element var functions = { init: function() { + functions.dispose.call(this); var refresh_el = $.proxy(refresh, this); refresh_el(); var $s = $t.settings; @@ -151,7 +152,9 @@ update: function(timestamp) { var date = (timestamp instanceof Date) ? timestamp : $t.parse(timestamp); $(this).data('timeago', { datetime: date }); - if ($t.settings.localeTitle) $(this).attr("title", date.toLocaleString()); + if ($t.settings.localeTitle) { + $(this).attr("title", date.toLocaleString()); + } refresh.apply(this); }, updateFromDOM: function() { @@ -191,7 +194,7 @@ var data = prepareData(this); if (!isNaN(data.datetime)) { - if ( $s.cutoff == 0 || Math.abs(distance(data.datetime)) < $s.cutoff) { + if ( $s.cutoff === 0 || Math.abs(distance(data.datetime)) < $s.cutoff) { $(this).text(inWords(data.datetime)); } else { if ($(this).attr('title').length > 0) { diff --git a/proposals/address-space-with-buffers.html b/proposals/address-space-with-buffers.html index c0fc96d177b93994275c1d7c2bee6b483ffbc0b0..c3c7bf98ea9700a62b55e094aa699f2488b26c5e 100644 --- a/proposals/address-space-with-buffers.html +++ b/proposals/address-space-with-buffers.html @@ -1,160 +1,161 @@ <!DOCTYPE html> <html lang='en'> - <head> - <meta content='text/html; charset=UTF-8' http-equiv='Content-Type'> - <meta content='width=device-width, initial-scale=1' name='viewport'> - <meta content='IE=edge' http-equiv='X-UA-Compatible'> - <meta content='no' name='msapplication-tap-highlight'> - <meta content='Aparapi is an Open-source framework for executing native Java code on the GPU, developed by Syncleus.' name='description'> - <title> - Aparapi | Address Space with Buffers - </title> - <!-- Favicons --> - <link href='/images/favicon/apple-touch-icon-152x152.png' rel='apple-touch-icon-precomposed'> - <meta content='#FFFFFF' name='msapplication-TileColor'> - <meta content='/images/favicon/mstile-144x144.png' name='msapplication-TileImage'> - <link href='/images/favicon/favicon-32x32.png' rel='icon' sizes='32x32'> - <!-- Android 5 Chrome Color --> - <meta content='#EE6E73' name='theme-color'> - <!-- CSS --> - <link href='/stylesheets/highlight.css' media='screen,projection' rel='stylesheet' type='text/css'> - <link href='/stylesheets/style.css' media='screen,projection' rel='stylesheet' type='text/css'> - <link href='http://fonts.googleapis.com/css?family=Inconsolata' rel='stylesheet' type='text/css'> - <link href='http://fonts.googleapis.com/icon?family=Material+Icons' rel='stylesheet'> - </head> - <body> - <header> - <div class='container'> - <a class='button-collapse top-nav waves-effect waves-light circle hide-on-large-only' data-activates='nav-mobile' href='#'> - <i class='material-icons'>menu</i> - </a> - </div> - <ul class='side-nav fixed' id='nav-mobile'> - <li class='logo'> - <a class='brand-logo' href='/' id='logo-container'> - <object data='/images/logo.svg' id='front-page-logo' type='image/svg+xml'>Your browser does not support SVG</object> - </a> - </li> - <li class='search'> - <div class='search-wrapper card'> - <input id='search'> - <i class='material-icons'>search</i> - <div class='search-results'></div> - </div> - </li> - <li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/">Overview</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Introduction</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/about.html">About</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/getting-started.html">Getting Started</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/faq.html">FAQ</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Documentation</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/aparapi-patterns.html">Aparapi Patterns</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/choosing-specific-devices.html">Choosing Specific Devices</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/converting-java-to-opencl.html">Converting Java to OpenCL</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/emulating-multiple-entrypoints.html">Emulating Multiple Entrypoints</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/explicit-buffer-handling.html">Explicit Buffer Handling</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/hsa-enabled-lambda.html">HSA Enabled Lambda</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/kernel-guidelines.html">Kernel Guidelines</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/library-agent-duality.html">Library Agent Duality</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/new-features.html">New Features</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/opencl-bindings.html">OpenCL Bindings</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/private-memory-space.html">Private Memory Space</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/profiling-the-kernel.html">Profiling the Kernel</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/setting-up-hsa.html">Setting Up HSA</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/unit-tests.html">Unit Tests</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/using-hsa-simulator.html">Using HSA Simulator</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/constant-memory.html">Constant Memory</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/local-memory.html">Local Memory</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/multiple-dim-ranges.html">Multiple Dim Ranges</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Proposals</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/multiple-dim-nd-range.html">Multiple Dim ND Range</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/lambdas.html">Lambdas</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/address-space-with-buffers.html">Address Space with Buffers</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/extensions.html">Extensions</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/device.html">Device</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/multiple-entry-points.html">Multiple Entry Points</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/lambda-syntax.html">Lambda Syntax</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/showcase.html">Showcase</a></li></ul></li> - </ul> - </header> - <main> - <div class='section no-pad-bot' id='index-banner'> - <div class='container'> - <h1 class='header center-on-small-only'>Address Space with Buffers</h1> - <div class='row center'> - <h4 class='header col s12 light center'>Discussion of OpenCL address space support using java Buffers instead of arrays.</h4> - </div> - - </div> - - </div> - <div class='container'> - <p>The general idea is to have a AS<em>PRIMTYPE</em>Buffer for each AS=address space and PRIM=primitive type. Here is an example for LocalFloatBuffer which would be a buffer for floats that got mapped to OpenCL local address space.</p> - - <p>As with normal FloatBuffers, the float elements are accessed using get and put methods</p> - - <p>Although a LocalFloatBuffer conceptually exists only for the lifetime of a workgroup, it is still constructed in the enclosing Kernel, not in the Kernel.Entry.run method. (Aparapi does not support constructing new objects inside the Kernel.Entry.run method).</p> - - <p>A typical declaration would be:</p> - <pre class="highlight java"><code> - <span class="n">LocalFloatBuffer</span> <span class="n">locbuf</span> <span class="o">=</span> <span class="k">new</span> <span class="n">LocalFloatBuffer</span><span class="o">{</span><span class="mi">12</span><span class="o">);</span> - </code></pre> - <p>The argument 12 here means that 12 floats would be used by each workitem in the workgroup. So the total buffer would be LocalSize*12 floats. Aparapi would at runtime allocate a total local OpenCL buffer to be this size. Note how this removes the need for the programmer to specify localSize anywhere.</p> - - <p>Note: For each Kernel.Entry.execute(globalSize) call, the runtime will determine an appropriate workgroup size, also called localSize, depending on the capabilities of the device, and on the globalSize. The localSize will always evenly divide the globalSize, in other words all workgroups for an execute context will be the same size. A workitem can determine localSize by calling getLocalSize().</p> - - <p>Because workitems operate simultaneously and in an undetermined order, workitems will generally only use put on its own portion of the LocalFloatBuffer between the LocalBarriers, and will generally only use get outside the LocalBarriers.</p> - - <p>Some example code (from NBody) follows. Here each workitem copies a “BODY” consisting of 4 floats. The global array contains 4*globalSize floats, and we want to iterate thru this global array, copying it into local memory and operating on it there. This will take globalSize/localSize “tiles”. For each tile, each workitem fills in one “BODY”’s worth or 4 elements</p> - <pre class="highlight java"><code> - <span class="c1">// outside run method...</span> - <span class="kd">final</span> <span class="kt">int</span> <span class="n">BODYSIZE</span> <span class="o">=</span> <span class="mi">4</span><span class="o">;</span> - <span class="n">LocalFloatBuffer</span> <span class="n">pos_xyzm_local</span> <span class="o">=</span> <span class="k">new</span> <span class="n">LocalFloatBuffer</span><span class="o">(</span><span class="n">BODYSIZE</span><span class="o">);</span> - <span class="c1">//</span> - <span class="c1">// inside run method...</span> - <span class="kt">int</span> <span class="n">numTiles</span> <span class="o">=</span> <span class="n">globalSize</span> <span class="o">/</span> <span class="n">localSize</span><span class="o">;</span> - <span class="k">for</span> <span class="o">(</span><span class="kt">int</span> <span class="n">i</span> <span class="o">=</span> <span class="mi">0</span><span class="o">;</span> <span class="n">i</span> <span class="o"><</span> <span class="n">numTiles</span><span class="o">;</span> <span class="o">++</span><span class="n">i</span><span class="o">)</span> <span class="o">{</span> - <span class="c1">// load one tile into local memory</span> - <span class="kt">int</span> <span class="n">idx</span> <span class="o">=</span> <span class="n">i</span> <span class="o">*</span> <span class="n">localSize</span> <span class="o">+</span> <span class="n">localId</span><span class="o">;</span> <span class="c1">// index into a global memory array</span> - <span class="n">localBarrier</span><span class="o">();</span> - <span class="n">pos_xyzm_local</span><span class="o">.</span><span class="na">put</span><span class="o">(</span><span class="n">localId</span> <span class="o">*</span> <span class="n">BODYSIZE</span> <span class="o">+</span> <span class="mi">0</span><span class="o">,</span> <span class="n">pos_xyzm</span><span class="o">[</span><span class="n">idx</span> <span class="o">*</span> <span class="n">BODYSIZE</span> <span class="o">+</span> <span class="mi">0</span><span class="o">]);</span> - <span class="n">pos_xyzm_local</span><span class="o">.</span><span class="na">put</span><span class="o">(</span><span class="n">localId</span> <span class="o">*</span> <span class="n">BODYSIZE</span> <span class="o">+</span> <span class="mi">1</span><span class="o">,</span> <span class="n">pos_xyzm</span><span class="o">[</span><span class="n">idx</span> <span class="o">*</span> <span class="n">BODYSIZE</span> <span class="o">+</span> <span class="mi">1</span><span class="o">]);</span> - <span class="n">pos_xyzm_local</span><span class="o">.</span><span class="na">put</span><span class="o">(</span><span class="n">localId</span> <span class="o">*</span> <span class="n">BODYSIZE</span> <span class="o">+</span> <span class="mi">2</span><span class="o">,</span> <span class="n">pos_xyzm</span><span class="o">[</span><span class="n">idx</span> <span class="o">*</span> <span class="n">BODYSIZE</span> <span class="o">+</span> <span class="mi">2</span><span class="o">]);</span> - <span class="n">pos_xyzm_local</span><span class="o">.</span><span class="na">put</span><span class="o">(</span><span class="n">localId</span> <span class="o">*</span> <span class="n">BODYSIZE</span> <span class="o">+</span> <span class="mi">3</span><span class="o">,</span> <span class="n">pos_xyzm</span><span class="o">[</span><span class="n">idx</span> <span class="o">*</span> <span class="n">BODYSIZE</span> <span class="o">+</span> <span class="mi">3</span><span class="o">]);</span> - <span class="c1">// Synchronize to make sure data is available for processing</span> - <span class="n">localBarrier</span><span class="o">();</span> - - <span class="c1">// now the entire LocalFloatBuffer has been filled.</span> - <span class="c1">// each workitem might use the entire Buffer</span> - <span class="c1">// which consists of localSize BODYs</span> - <span class="k">for</span> <span class="o">(</span><span class="kt">int</span> <span class="n">j</span> <span class="o">=</span> <span class="mi">0</span><span class="o">;</span> <span class="n">j</span> <span class="o"><</span> <span class="n">localSize</span><span class="o">;</span> <span class="o">++</span><span class="n">j</span><span class="o">)</span> <span class="o">{</span> - <span class="kt">float</span> <span class="n">r_x</span> <span class="o">=</span> <span class="n">pos_xyzm_local</span><span class="o">.</span><span class="na">get</span><span class="o">(</span><span class="n">j</span> <span class="o">*</span> <span class="n">BODYSIZE</span> <span class="o">+</span> <span class="mi">0</span><span class="o">)</span> <span class="o">-</span> <span class="n">myPos_x</span><span class="o">;</span> - <span class="kt">float</span> <span class="n">r_y</span> <span class="o">=</span> <span class="n">pos_xyzm_local</span><span class="o">.</span><span class="na">get</span><span class="o">(</span><span class="n">j</span> <span class="o">*</span> <span class="n">BODYSIZE</span> <span class="o">+</span> <span class="mi">1</span><span class="o">)</span> <span class="o">-</span> <span class="n">myPos_y</span><span class="o">;</span> - <span class="kt">float</span> <span class="n">r_z</span> <span class="o">=</span> <span class="n">pos_xyzm_local</span><span class="o">.</span><span class="na">get</span><span class="o">(</span><span class="n">j</span> <span class="o">*</span> <span class="n">BODYSIZE</span> <span class="o">+</span> <span class="mi">2</span><span class="o">)</span> <span class="o">-</span> <span class="n">myPos_z</span><span class="o">;</span> - <span class="c1">// ...etc</span> - </code></pre> - </div> - </main> - <footer class='page-footer'> - <div class='container'> - <div class='row'> - <div class='col l4 s12'> - <h5 class='white-text'>Help Aparapi Grow</h5> - <p class='grey-text text-lighten-4'>We are a team of volunteers working on this project like it's our full time job. Any amount would help support and continue development on this project and is greatly appreciated.</p> - <form action='https://www.paypal.com/cgi-bin/webscr' id='paypal-donate' method='post' target='_top'> - <input name='cmd' type='hidden' value='_s-xclick'> - <input name='encrypted' type='hidden' value='-----BEGIN PKCS7-----MIIHoAYJKoZIhvcNAQcEoIIHkTCCB40CAQExggEwMIIBLAIBADCBlDCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20CAQAwDQYJKoZIhvcNAQEBBQAEgYATcKxN8t35TG2x34eY272SuZO3QbGy+BTGIM5DRV6Hmosotzw2TF42ceWmbXb3Gk4Wy5kUgo4TgHExCZHUSlHUl+A9KWLFejotgQJPhbiBsnns3klWbKftA3LEnP/kz/SW7OyBlpluoHoEGb354/aoX3JEctp3akHiZEmD7JyEgjELMAkGBSsOAwIaBQAwggEcBgkqhkiG9w0BBwEwFAYIKoZIhvcNAwcECOGCJwba6JICgIH4RtE1LE3juagKs+swI5tb9Y2LacWo+qn1H1aLKeg57bQMqqcWYvkoO1joYoglPc1h4mO0egZjHPQ6ih0K0IYlXw2SRpNylSlIMUE3GW6smjSSwRhscZfXQYUnmQsfYvkFwoKrlZGf/1u0Q7nwlZ1szIKnDMZ5f+k8xBcM0sMNutn/y9CH6A3zo01gQBIF29+1WYAoQspNAnfWQy3ydV7nbjIA9ThDp2WquWw3EVlvqlvm/3C2AFuH/L4q0ltn3qjkCdzXK0O2jW3TRrzligPkAy6CN0Tw2jGW5GENNC1L92vHFH4kBXUPlhvw39TgoN7/KRUjVoYPYgugggOHMIIDgzCCAuygAwIBAgIBADANBgkqhkiG9w0BAQUFADCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20wHhcNMDQwMjEzMTAxMzE1WhcNMzUwMjEzMTAxMzE1WjCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20wgZ8wDQYJKoZIhvcNAQEBBQADgY0AMIGJAoGBAMFHTt38RMxLXJyO2SmS+Ndl72T7oKJ4u4uw+6awntALWh03PewmIJuzbALScsTS4sZoS1fKciBGoh11gIfHzylvkdNe/hJl66/RGqrj5rFb08sAABNTzDTiqqNpJeBsYs/c2aiGozptX2RlnBktH+SUNpAajW724Nv2Wvhif6sFAgMBAAGjge4wgeswHQYDVR0OBBYEFJaffLvGbxe9WT9S1wob7BDWZJRrMIG7BgNVHSMEgbMwgbCAFJaffLvGbxe9WT9S1wob7BDWZJRroYGUpIGRMIGOMQswCQYDVQQGEwJVUzELMAkGA1UECBMCQ0ExFjAUBgNVBAcTDU1vdW50YWluIFZpZXcxFDASBgNVBAoTC1BheVBhbCBJbmMuMRMwEQYDVQQLFApsaXZlX2NlcnRzMREwDwYDVQQDFAhsaXZlX2FwaTEcMBoGCSqGSIb3DQEJARYNcmVAcGF5cGFsLmNvbYIBADAMBgNVHRMEBTADAQH/MA0GCSqGSIb3DQEBBQUAA4GBAIFfOlaagFrl71+jq6OKidbWFSE+Q4FqROvdgIONth+8kSK//Y/4ihuE4Ymvzn5ceE3S/iBSQQMjyvb+s2TWbQYDwcp129OPIbD9epdr4tJOUNiSojw7BHwYRiPh58S1xGlFgHFXwrEBb3dgNbMUa+u4qectsMAXpVHnD9wIyfmHMYIBmjCCAZYCAQEwgZQwgY4xCzAJBgNVBAYTAlVTMQswCQYDVQQIEwJDQTEWMBQGA1UEBxMNTW91bnRhaW4gVmlldzEUMBIGA1UEChMLUGF5UGFsIEluYy4xEzARBgNVBAsUCmxpdmVfY2VydHMxETAPBgNVBAMUCGxpdmVfYXBpMRwwGgYJKoZIhvcNAQkBFg1yZUBwYXlwYWwuY29tAgEAMAkGBSsOAwIaBQCgXTAYBgkqhkiG9w0BCQMxCwYJKoZIhvcNAQcBMBwGCSqGSIb3DQEJBTEPFw0xNjExMjkyMjA1NTNaMCMGCSqGSIb3DQEJBDEWBBS4i3Exr/pFcKOJy8uKmH+nGIMjqDANBgkqhkiG9w0BAQEFAASBgDAbFZ2jieloeB/0wCAcvYCFAIXmmBaMS5js/byzU5gK7exSTlRMX74IkmHemItaOcw3wyFlu4i118D9K9SbSbFiX9DGDcezGh42u/6G8TuZMwlvmiehwMioTVcm4jWG40YLiv8pJZypfoSx2w4IAFb4na5i/E1qOrwQOpiBho+s-----END PKCS7-----'> - <button alt='PayPal - The safer, easier way to pay online!' class='btn waves-effect waves-light red lighten-3' name='action' type='submit'> - Donate Now - </button> - </form> - </div> - <div class='col l4 s12'> - <h5 class='white-text'>Join the Discussion</h5> - <p class='grey-text text-lighten-4'>We have a Gitter chat room set up where you can talk directly with us. Come in and discuss new features, future goals, general problems or questions, or anything else you can think of.</p> - <a class='btn waves-effect waves-light red lighten-3' href='https://gitter.im/Syncleus/aparapi' target='_blank'>Chat</a> - </div> - <div class='col l4 s12' style='overflow: hidden;'> - <h5 class='white-text'>Connect</h5> - <iframe allowtransparency='true' frameborder='0' height='30' scrolling='0' src='http://ghbtns.com/github-btn.html?user=Syncleus&repo=aparapi&type=watch&count=true&size=large' width='170'></iframe> - <br> - <a class='twitter-follow-button' data-dnt='true' data-show-count='true' data-size='large' href='https://twitter.com/AparapiLib'>Follow @AparapiLib</a> - <br> - <div class='g-follow' data-annotation='bubble' data-height='24' data-href='https://plus.google.com/102266131584900704956' data-rel='publisher'></div> - </div> - </div> - </div> - <div class='footer-copyright'> - <div class='container'> - © 2016-2017 Syncleus, All rights reserved. - <a class='grey-text text-lighten-4 right' href='https://github.com/Syncleus/aparapi/blob/master/LICENSE'>Apache License v2</a> - </div> - </div> - </footer> - <!-- Scripts --> - <script src='https://code.jquery.com/jquery-2.1.4.min.js'></script> - <script> - if (!window.jQuery) { document.write('<script src="bin/jquery-2.1.1.min.js"><\/script>'); } - </script> - <script src='/javascripts/jquery.timeago.js'></script> - <script src='/javascripts/materialize.min.js'></script> - <script src='/javascripts/lunr.min.js'></script> - <script src='/javascripts/search.js'></script> - <script src='/javascripts/materialize.js'></script> - <script src='/javascripts/init.js'></script> - <!-- Twitter Button --> - <script> - !function(d,s,id){var js,fjs=d.getElementsByTagName(s)[0],p=/^http:/.test(d.location)?'http':'https';if(!d.getElementById(id)){js=d.createElement(s);js.id=id;js.src=p+'://platform.twitter.com/widgets.js';fjs.parentNode.insertBefore(js,fjs);}}(document, 'script', 'twitter-wjs'); - </script> - <!-- Google Plus Button --> - <script async='' defer='defer' src='https://apis.google.com/js/platform.js'></script> - </body> +<head> +<meta content='text/html; charset=UTF-8' http-equiv='Content-Type'> +<meta content='width=device-width, initial-scale=1' name='viewport'> +<meta content='IE=edge' http-equiv='X-UA-Compatible'> +<meta content='no' name='msapplication-tap-highlight'> +<meta content='Aparapi is an Open-source framework for executing native Java code on the GPU, developed by Syncleus.' name='description'> +<title> +Aparapi | Address Space with Buffers +</title> +<!-- Favicons --> +<link href='/images/favicon/apple-touch-icon-152x152.png' rel='apple-touch-icon-precomposed'> +<meta content='#FFFFFF' name='msapplication-TileColor'> +<meta content='/images/favicon/mstile-144x144.png' name='msapplication-TileImage'> +<link href='/images/favicon/favicon-32x32.png' rel='icon' sizes='32x32'> +<!-- Android 5 Chrome Color --> +<meta content='#EE6E73' name='theme-color'> +<!-- CSS --> +<link href='/stylesheets/highlight.css' media='screen,projection' rel='stylesheet' type='text/css'> +<link href='/stylesheets/style.css' media='screen,projection' rel='stylesheet' type='text/css'> +<link href='http://fonts.googleapis.com/css?family=Inconsolata' rel='stylesheet' type='text/css'> +<link href='http://fonts.googleapis.com/icon?family=Material+Icons' rel='stylesheet'> +</head> +<body> +<header> +<div class='container'> +<a class='button-collapse top-nav waves-effect waves-light circle hide-on-large-only' data-activates='nav-mobile' href='#'> +<i class='material-icons'>menu</i> +</a> +</div> +<ul class='side-nav fixed' id='nav-mobile'> +<li class='logo'> +<a class='brand-logo' href='/' id='logo-container'> +<object data='/images/logo.svg' id='front-page-logo' type='image/svg+xml'>Your browser does not support SVG</object> +</a> +</li> +<li class='search'> +<div class='search-wrapper card'> +<input id='search'> +<i class='material-icons'>search</i> +<div class='search-results'></div> +</div> +</li> +<li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/">Overview</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Introduction</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/about.html">About</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/getting-started.html">Getting Started</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/faq.html">FAQ</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Documentation</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/aparapi-patterns.html">Aparapi Patterns</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/choosing-specific-devices.html">Choosing Specific Devices</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/converting-java-to-opencl.html">Converting Java to OpenCL</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/emulating-multiple-entrypoints.html">Emulating Multiple Entrypoints</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/explicit-buffer-handling.html">Explicit Buffer Handling</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/hsa-enabled-lambda.html">HSA Enabled Lambda</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/kernel-guidelines.html">Kernel Guidelines</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/library-agent-duality.html">Library Agent Duality</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/new-features.html">New Features</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/opencl-bindings.html">OpenCL Bindings</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/private-memory-space.html">Private Memory Space</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/profiling-the-kernel.html">Profiling the Kernel</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/setting-up-hsa.html">Setting Up HSA</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/unit-tests.html">Unit Tests</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/using-hsa-simulator.html">Using HSA Simulator</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/constant-memory.html">Constant Memory</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/local-memory.html">Local Memory</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/multiple-dim-ranges.html">Multiple Dim Ranges</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Proposals</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/multiple-dim-nd-range.html">Multiple Dim ND Range</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/lambdas.html">Lambdas</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/address-space-with-buffers.html">Address Space with Buffers</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/extensions.html">Extensions</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/device.html">Device</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/multiple-entry-points.html">Multiple Entry Points</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/lambda-syntax.html">Lambda Syntax</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/showcase.html">Showcase</a></li></ul></li> +</ul> +</header> +<main> +<div class='section no-pad-bot' id='index-banner'> +<div class='container'> +<h1 class='header center-on-small-only'>Address Space with Buffers</h1> +<div class='row center'> +<h4 class='header col s12 light center'>Discussion of OpenCL address space support using java Buffers instead of arrays.</h4> +</div> + +</div> + +</div> +<div class='container'> +<p>The general idea is to have a AS<em>PRIMTYPE</em>Buffer for each AS=address space and PRIM=primitive type. Here is an example for LocalFloatBuffer which would be a buffer for floats that got mapped to OpenCL local address space.</p> + +<p>As with normal FloatBuffers, the float elements are accessed using get and put methods</p> + +<p>Although a LocalFloatBuffer conceptually exists only for the lifetime of a workgroup, it is still constructed in the enclosing Kernel, not in the Kernel.Entry.run method. (Aparapi does not support constructing new objects inside the Kernel.Entry.run method).</p> + +<p>A typical declaration would be:</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="n">LocalFloatBuffer</span> <span class="n">locbuf</span> <span class="o">=</span> <span class="k">new</span> <span class="n">LocalFloatBuffer</span><span class="o">{</span><span class="mi">12</span><span class="o">);</span> +</code></pre></div> +<p>The argument 12 here means that 12 floats would be used by each workitem in the workgroup. So the total buffer would be LocalSize*12 floats. Aparapi would at runtime allocate a total local OpenCL buffer to be this size. Note how this removes the need for the programmer to specify localSize anywhere.</p> + +<p>Note: For each Kernel.Entry.execute(globalSize) call, the runtime will determine an appropriate workgroup size, also called localSize, depending on the capabilities of the device, and on the globalSize. The localSize will always evenly divide the globalSize, in other words all workgroups for an execute context will be the same size. A workitem can determine localSize by calling getLocalSize().</p> + +<p>Because workitems operate simultaneously and in an undetermined order, workitems will generally only use put on its own portion of the LocalFloatBuffer between the LocalBarriers, and will generally only use get outside the LocalBarriers.</p> + +<p>Some example code (from NBody) follows. Here each workitem copies a “BODY” consisting of 4 floats. The global array contains 4*globalSize floats, and we want to iterate thru this global array, copying it into local memory and operating on it there. This will take globalSize/localSize “tiles”. For each tile, each workitem fills in one “BODY”‘s worth or 4 elements</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="c1">// outside run method...</span> +<span class="kd">final</span> <span class="kt">int</span> <span class="n">BODYSIZE</span> <span class="o">=</span> <span class="mi">4</span><span class="o">;</span> +<span class="n">LocalFloatBuffer</span> <span class="n">pos_xyzm_local</span> <span class="o">=</span> <span class="k">new</span> <span class="n">LocalFloatBuffer</span><span class="o">(</span><span class="n">BODYSIZE</span><span class="o">);</span> +<span class="c1">//</span> +<span class="c1">// inside run method...</span> +<span class="kt">int</span> <span class="n">numTiles</span> <span class="o">=</span> <span class="n">globalSize</span> <span class="o">/</span> <span class="n">localSize</span><span class="o">;</span> +<span class="k">for</span> <span class="o">(</span><span class="kt">int</span> <span class="n">i</span> <span class="o">=</span> <span class="mi">0</span><span class="o">;</span> <span class="n">i</span> <span class="o"><</span> <span class="n">numTiles</span><span class="o">;</span> <span class="o">++</span><span class="n">i</span><span class="o">)</span> <span class="o">{</span> + <span class="c1">// load one tile into local memory</span> + <span class="kt">int</span> <span class="n">idx</span> <span class="o">=</span> <span class="n">i</span> <span class="o">*</span> <span class="n">localSize</span> <span class="o">+</span> <span class="n">localId</span><span class="o">;</span> <span class="c1">// index into a global memory array</span> + <span class="n">localBarrier</span><span class="o">();</span> + <span class="n">pos_xyzm_local</span><span class="o">.</span><span class="na">put</span><span class="o">(</span><span class="n">localId</span> <span class="o">*</span> <span class="n">BODYSIZE</span> <span class="o">+</span> <span class="mi">0</span><span class="o">,</span> <span class="n">pos_xyzm</span><span class="o">[</span><span class="n">idx</span> <span class="o">*</span> <span class="n">BODYSIZE</span> <span class="o">+</span> <span class="mi">0</span><span class="o">]);</span> + <span class="n">pos_xyzm_local</span><span class="o">.</span><span class="na">put</span><span class="o">(</span><span class="n">localId</span> <span class="o">*</span> <span class="n">BODYSIZE</span> <span class="o">+</span> <span class="mi">1</span><span class="o">,</span> <span class="n">pos_xyzm</span><span class="o">[</span><span class="n">idx</span> <span class="o">*</span> <span class="n">BODYSIZE</span> <span class="o">+</span> <span class="mi">1</span><span class="o">]);</span> + <span class="n">pos_xyzm_local</span><span class="o">.</span><span class="na">put</span><span class="o">(</span><span class="n">localId</span> <span class="o">*</span> <span class="n">BODYSIZE</span> <span class="o">+</span> <span class="mi">2</span><span class="o">,</span> <span class="n">pos_xyzm</span><span class="o">[</span><span class="n">idx</span> <span class="o">*</span> <span class="n">BODYSIZE</span> <span class="o">+</span> <span class="mi">2</span><span class="o">]);</span> + <span class="n">pos_xyzm_local</span><span class="o">.</span><span class="na">put</span><span class="o">(</span><span class="n">localId</span> <span class="o">*</span> <span class="n">BODYSIZE</span> <span class="o">+</span> <span class="mi">3</span><span class="o">,</span> <span class="n">pos_xyzm</span><span class="o">[</span><span class="n">idx</span> <span class="o">*</span> <span class="n">BODYSIZE</span> <span class="o">+</span> <span class="mi">3</span><span class="o">]);</span> + <span class="c1">// Synchronize to make sure data is available for processing</span> + <span class="n">localBarrier</span><span class="o">();</span> + + <span class="c1">// now the entire LocalFloatBuffer has been filled.</span> + <span class="c1">// each workitem might use the entire Buffer</span> + <span class="c1">// which consists of localSize BODYs</span> + <span class="k">for</span> <span class="o">(</span><span class="kt">int</span> <span class="n">j</span> <span class="o">=</span> <span class="mi">0</span><span class="o">;</span> <span class="n">j</span> <span class="o"><</span> <span class="n">localSize</span><span class="o">;</span> <span class="o">++</span><span class="n">j</span><span class="o">)</span> <span class="o">{</span> + <span class="kt">float</span> <span class="n">r_x</span> <span class="o">=</span> <span class="n">pos_xyzm_local</span><span class="o">.</span><span class="na">get</span><span class="o">(</span><span class="n">j</span> <span class="o">*</span> <span class="n">BODYSIZE</span> <span class="o">+</span> <span class="mi">0</span><span class="o">)</span> <span class="o">-</span> <span class="n">myPos_x</span><span class="o">;</span> + <span class="kt">float</span> <span class="n">r_y</span> <span class="o">=</span> <span class="n">pos_xyzm_local</span><span class="o">.</span><span class="na">get</span><span class="o">(</span><span class="n">j</span> <span class="o">*</span> <span class="n">BODYSIZE</span> <span class="o">+</span> <span class="mi">1</span><span class="o">)</span> <span class="o">-</span> <span class="n">myPos_y</span><span class="o">;</span> + <span class="kt">float</span> <span class="n">r_z</span> <span class="o">=</span> <span class="n">pos_xyzm_local</span><span class="o">.</span><span class="na">get</span><span class="o">(</span><span class="n">j</span> <span class="o">*</span> <span class="n">BODYSIZE</span> <span class="o">+</span> <span class="mi">2</span><span class="o">)</span> <span class="o">-</span> <span class="n">myPos_z</span><span class="o">;</span> + <span class="c1">// ...etc</span> +</code></pre></div> +</div> +</main> +<footer class='page-footer'> +<div class='container'> +<div class='row'> +<div class='col l4 s12'> +<h5 class='white-text'>Help Aparapi Grow</h5> +<p class='grey-text text-lighten-4'>We are a team of volunteers working on this project like it's our full time job. Any amount would help support and continue development on this project and is greatly appreciated.</p> +<form action='https://www.paypal.com/cgi-bin/webscr' id='paypal-donate' method='post' target='_top'> +<input name='cmd' type='hidden' value='_s-xclick'> +<input name='encrypted' type='hidden' value='-----BEGIN PKCS7-----MIIHoAYJKoZIhvcNAQcEoIIHkTCCB40CAQExggEwMIIBLAIBADCBlDCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20CAQAwDQYJKoZIhvcNAQEBBQAEgYATcKxN8t35TG2x34eY272SuZO3QbGy+BTGIM5DRV6Hmosotzw2TF42ceWmbXb3Gk4Wy5kUgo4TgHExCZHUSlHUl+A9KWLFejotgQJPhbiBsnns3klWbKftA3LEnP/kz/SW7OyBlpluoHoEGb354/aoX3JEctp3akHiZEmD7JyEgjELMAkGBSsOAwIaBQAwggEcBgkqhkiG9w0BBwEwFAYIKoZIhvcNAwcECOGCJwba6JICgIH4RtE1LE3juagKs+swI5tb9Y2LacWo+qn1H1aLKeg57bQMqqcWYvkoO1joYoglPc1h4mO0egZjHPQ6ih0K0IYlXw2SRpNylSlIMUE3GW6smjSSwRhscZfXQYUnmQsfYvkFwoKrlZGf/1u0Q7nwlZ1szIKnDMZ5f+k8xBcM0sMNutn/y9CH6A3zo01gQBIF29+1WYAoQspNAnfWQy3ydV7nbjIA9ThDp2WquWw3EVlvqlvm/3C2AFuH/L4q0ltn3qjkCdzXK0O2jW3TRrzligPkAy6CN0Tw2jGW5GENNC1L92vHFH4kBXUPlhvw39TgoN7/KRUjVoYPYgugggOHMIIDgzCCAuygAwIBAgIBADANBgkqhkiG9w0BAQUFADCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20wHhcNMDQwMjEzMTAxMzE1WhcNMzUwMjEzMTAxMzE1WjCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20wgZ8wDQYJKoZIhvcNAQEBBQADgY0AMIGJAoGBAMFHTt38RMxLXJyO2SmS+Ndl72T7oKJ4u4uw+6awntALWh03PewmIJuzbALScsTS4sZoS1fKciBGoh11gIfHzylvkdNe/hJl66/RGqrj5rFb08sAABNTzDTiqqNpJeBsYs/c2aiGozptX2RlnBktH+SUNpAajW724Nv2Wvhif6sFAgMBAAGjge4wgeswHQYDVR0OBBYEFJaffLvGbxe9WT9S1wob7BDWZJRrMIG7BgNVHSMEgbMwgbCAFJaffLvGbxe9WT9S1wob7BDWZJRroYGUpIGRMIGOMQswCQYDVQQGEwJVUzELMAkGA1UECBMCQ0ExFjAUBgNVBAcTDU1vdW50YWluIFZpZXcxFDASBgNVBAoTC1BheVBhbCBJbmMuMRMwEQYDVQQLFApsaXZlX2NlcnRzMREwDwYDVQQDFAhsaXZlX2FwaTEcMBoGCSqGSIb3DQEJARYNcmVAcGF5cGFsLmNvbYIBADAMBgNVHRMEBTADAQH/MA0GCSqGSIb3DQEBBQUAA4GBAIFfOlaagFrl71+jq6OKidbWFSE+Q4FqROvdgIONth+8kSK//Y/4ihuE4Ymvzn5ceE3S/iBSQQMjyvb+s2TWbQYDwcp129OPIbD9epdr4tJOUNiSojw7BHwYRiPh58S1xGlFgHFXwrEBb3dgNbMUa+u4qectsMAXpVHnD9wIyfmHMYIBmjCCAZYCAQEwgZQwgY4xCzAJBgNVBAYTAlVTMQswCQYDVQQIEwJDQTEWMBQGA1UEBxMNTW91bnRhaW4gVmlldzEUMBIGA1UEChMLUGF5UGFsIEluYy4xEzARBgNVBAsUCmxpdmVfY2VydHMxETAPBgNVBAMUCGxpdmVfYXBpMRwwGgYJKoZIhvcNAQkBFg1yZUBwYXlwYWwuY29tAgEAMAkGBSsOAwIaBQCgXTAYBgkqhkiG9w0BCQMxCwYJKoZIhvcNAQcBMBwGCSqGSIb3DQEJBTEPFw0xNjExMjkyMjA1NTNaMCMGCSqGSIb3DQEJBDEWBBS4i3Exr/pFcKOJy8uKmH+nGIMjqDANBgkqhkiG9w0BAQEFAASBgDAbFZ2jieloeB/0wCAcvYCFAIXmmBaMS5js/byzU5gK7exSTlRMX74IkmHemItaOcw3wyFlu4i118D9K9SbSbFiX9DGDcezGh42u/6G8TuZMwlvmiehwMioTVcm4jWG40YLiv8pJZypfoSx2w4IAFb4na5i/E1qOrwQOpiBho+s-----END PKCS7----- +'> +<button alt='PayPal - The safer, easier way to pay online!' class='btn waves-effect waves-light red lighten-3' name='action' type='submit'> +Donate Now +</button> +</form> +</div> +<div class='col l4 s12'> +<h5 class='white-text'>Join the Discussion</h5> +<p class='grey-text text-lighten-4'>We have a Gitter chat room set up where you can talk directly with us. Come in and discuss new features, future goals, general problems or questions, or anything else you can think of.</p> +<a class='btn waves-effect waves-light red lighten-3' href='https://gitter.im/Syncleus/aparapi' target='_blank'>Chat</a> +</div> +<div class='col l4 s12' style='overflow: hidden;'> +<h5 class='white-text'>Connect</h5> +<iframe allowtransparency='true' frameborder='0' height='30' scrolling='0' src='http://ghbtns.com/github-btn.html?user=Syncleus&repo=aparapi&type=watch&count=true&size=large' width='170'></iframe> +<br> +<a class='twitter-follow-button' data-dnt='true' data-show-count='true' data-size='large' href='https://twitter.com/AparapiLib'>Follow @AparapiLib</a> +<br> +<div class='g-follow' data-annotation='bubble' data-height='24' data-href='https://plus.google.com/102266131584900704956' data-rel='publisher'></div> +</div> +</div> +</div> +<div class='footer-copyright'> +<div class='container'> +© 2016-2017 Syncleus, All rights reserved. +<a class='grey-text text-lighten-4 right' href='https://github.com/Syncleus/aparapi/blob/master/LICENSE'>Apache License v2</a> +</div> +</div> +</footer> +<!-- Scripts --> +<script src='https://code.jquery.com/jquery-2.1.4.min.js'></script> +<script> + if (!window.jQuery) { document.write('<script src="bin/jquery-2.1.1.min.js"><\/script>'); } +</script> +<script src='/javascripts/jquery.timeago.js'></script> +<script src='/javascripts/materialize.min.js'></script> +<script src='/javascripts/lunr.min.js'></script> +<script src='/javascripts/search.js'></script> +<script src='/javascripts/materialize.js'></script> +<script src='/javascripts/init.js'></script> +<!-- Twitter Button --> +<script> + !function(d,s,id){var js,fjs=d.getElementsByTagName(s)[0],p=/^http:/.test(d.location)?'http':'https';if(!d.getElementById(id)){js=d.createElement(s);js.id=id;js.src=p+'://platform.twitter.com/widgets.js';fjs.parentNode.insertBefore(js,fjs);}}(document, 'script', 'twitter-wjs'); +</script> +<!-- Google Plus Button --> +<script async='' defer='defer' src='https://apis.google.com/js/platform.js'></script> +</body> </html> diff --git a/proposals/device.html b/proposals/device.html index d2224377aacd5cdaf018a902eea058ba8f3180a1..f198a872b98d58a27bd2a853b723844e236ac62d 100644 --- a/proposals/device.html +++ b/proposals/device.html @@ -1,183 +1,184 @@ <!DOCTYPE html> <html lang='en'> - <head> - <meta content='text/html; charset=UTF-8' http-equiv='Content-Type'> - <meta content='width=device-width, initial-scale=1' name='viewport'> - <meta content='IE=edge' http-equiv='X-UA-Compatible'> - <meta content='no' name='msapplication-tap-highlight'> - <meta content='Aparapi is an Open-source framework for executing native Java code on the GPU, developed by Syncleus.' name='description'> - <title> - Aparapi | Device - </title> - <!-- Favicons --> - <link href='/images/favicon/apple-touch-icon-152x152.png' rel='apple-touch-icon-precomposed'> - <meta content='#FFFFFF' name='msapplication-TileColor'> - <meta content='/images/favicon/mstile-144x144.png' name='msapplication-TileImage'> - <link href='/images/favicon/favicon-32x32.png' rel='icon' sizes='32x32'> - <!-- Android 5 Chrome Color --> - <meta content='#EE6E73' name='theme-color'> - <!-- CSS --> - <link href='/stylesheets/highlight.css' media='screen,projection' rel='stylesheet' type='text/css'> - <link href='/stylesheets/style.css' media='screen,projection' rel='stylesheet' type='text/css'> - <link href='http://fonts.googleapis.com/css?family=Inconsolata' rel='stylesheet' type='text/css'> - <link href='http://fonts.googleapis.com/icon?family=Material+Icons' rel='stylesheet'> - </head> - <body> - <header> - <div class='container'> - <a class='button-collapse top-nav waves-effect waves-light circle hide-on-large-only' data-activates='nav-mobile' href='#'> - <i class='material-icons'>menu</i> - </a> - </div> - <ul class='side-nav fixed' id='nav-mobile'> - <li class='logo'> - <a class='brand-logo' href='/' id='logo-container'> - <object data='/images/logo.svg' id='front-page-logo' type='image/svg+xml'>Your browser does not support SVG</object> - </a> - </li> - <li class='search'> - <div class='search-wrapper card'> - <input id='search'> - <i class='material-icons'>search</i> - <div class='search-results'></div> - </div> - </li> - <li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/">Overview</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Introduction</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/about.html">About</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/getting-started.html">Getting Started</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/faq.html">FAQ</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Documentation</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/aparapi-patterns.html">Aparapi Patterns</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/choosing-specific-devices.html">Choosing Specific Devices</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/converting-java-to-opencl.html">Converting Java to OpenCL</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/emulating-multiple-entrypoints.html">Emulating Multiple Entrypoints</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/explicit-buffer-handling.html">Explicit Buffer Handling</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/hsa-enabled-lambda.html">HSA Enabled Lambda</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/kernel-guidelines.html">Kernel Guidelines</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/library-agent-duality.html">Library Agent Duality</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/new-features.html">New Features</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/opencl-bindings.html">OpenCL Bindings</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/private-memory-space.html">Private Memory Space</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/profiling-the-kernel.html">Profiling the Kernel</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/setting-up-hsa.html">Setting Up HSA</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/unit-tests.html">Unit Tests</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/using-hsa-simulator.html">Using HSA Simulator</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/constant-memory.html">Constant Memory</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/local-memory.html">Local Memory</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/multiple-dim-ranges.html">Multiple Dim Ranges</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Proposals</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/multiple-dim-nd-range.html">Multiple Dim ND Range</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/lambdas.html">Lambdas</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/address-space-with-buffers.html">Address Space with Buffers</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/extensions.html">Extensions</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/device.html">Device</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/multiple-entry-points.html">Multiple Entry Points</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/lambda-syntax.html">Lambda Syntax</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/showcase.html">Showcase</a></li></ul></li> - </ul> - </header> - <main> - <div class='section no-pad-bot' id='index-banner'> - <div class='container'> - <h1 class='header center-on-small-only'>Device</h1> - <div class='row center'> - <h4 class='header col s12 light center'>How we might use the extension mechanism devices for general Kernel execution.</h4> - </div> - - </div> - - </div> - <div class='container'> - <p>At present the first GPU or CPU device (depending on Kernel.ExecutionMode value) is chosen at execution time. This make it easy to execute simple Kernels, but is problematic when using some advanced feature (barriers, local memory) or for sizing buffers appropriate for the target device. I propose that we add API’s to allow the developer to specify exactly which device we intend to target.</p> - - <p>In the extension proposal branch we needed to expose a Device class for binding arbitrary OpenCL to a Java interface. I suggest we also be use this to query device information useful for allocating suitable size global buffers/local buffers, and for dispatching Kernel’s to specific devices.</p> - - <p>The general pattern would be that we ask Aparapi to give us a Device, probably via a Device factory method.</p> - - <p>Something like:-</p> - <pre class="highlight java"><code> - <span class="n">Device</span> <span class="n">device</span> <span class="o">=</span> <span class="n">Device</span><span class="o">.</span><span class="na">best</span><span class="o">();</span> - </code></pre> - <p>We would also offer other useful factory methods <code>getBestGPU()</code>, <code>getFirstCPU()</code>, <code>getJavaMultiThread()</code>, <code>getJavaSequential()</code> as well as a method to get all device so that the developer can filter themselves.</p> - - <p>Note that as well as real OpenCL devices we also expose ‘pseudo’ devices such as JavaMultiThread and Sequential. We might also allow pseudo devices to group multiple devices. So <code>getAllGPUDevices()</code> might return a pseudo device for executing across devices.</p> - <pre class="highlight plaintext"><code>Device chosen=null; - for (Device device: devices.getAll()){ - if (device.getVendor().contains("AMD") && device.isGPU()){ - chosen = device; - break; - } - } - </code></pre> - <p>A Device can be queried (<code>isGPU()</code>, <code>isOpenCL()</code>, <code>isGroup()</code>, <code>isJava()</code>, <code>getOpenCLPlatform()</code>, <code>getMaxMemory()</code>, <code>getLocalSizes()</code>) and may need to be cast to specific types.</p> - - <p>This would allow us to configure buffers.</p> - <pre class="highlight java"><code> - <span class="n">Device</span> <span class="n">device</span> <span class="o">=</span> <span class="n">Device</span><span class="o">.</span><span class="na">best</span><span class="o">();</span> - <span class="k">if</span> <span class="o">(</span><span class="n">device</span> <span class="k">instanceof</span> <span class="n">OpenCLDevice</span><span class="o">){</span> - <span class="n">OpenCLDevice</span> <span class="n">openCLDevice</span> <span class="o">=</span> <span class="o">(</span><span class="n">OpenCLDevice</span><span class="o">)</span><span class="n">device</span><span class="o">;</span> - <span class="kt">char</span> <span class="n">input</span><span class="o">[]</span> <span class="o">=</span> <span class="k">new</span> <span class="kt">char</span><span class="o">[</span><span class="n">openCLDevice</span><span class="o">.</span><span class="na">getMaxMemory</span><span class="o">()/</span><span class="mi">4</span><span class="o">);</span> - <span class="o">}</span> - </code></pre> - <p>We can also use the Device as a factory for creating Ranges.</p> - <pre class="highlight java"><code> - <span class="n">Range</span> <span class="n">range</span> <span class="o">=</span> <span class="n">device</span><span class="o">.</span><span class="na">createRange2D</span><span class="o">(</span><span class="n">width</span><span class="o">,</span> <span class="n">height</span><span class="o">);</span> - </code></pre> - <p>This allows the Range to be created with knowledge of the underlying device. So for example <code>device.createRange3D(1024, 1024, 1024, 16, 16, 16)</code> will fail if the device does not allow a local size of (16x16x16).</p> - - <p>A range created using <code>device.createRangeXX()</code> would also capture the device that created it. As if we had</p> - <pre class="highlight java"><code> - <span class="n">Range</span> <span class="n">range</span> <span class="o">=</span> <span class="n">device</span><span class="o">.</span><span class="na">createRange2D</span><span class="o">(</span><span class="n">width</span><span class="o">,</span> <span class="n">height</span><span class="o">);</span> - <span class="c1">// implied range.setDevice(device);</span> - <span class="n">This</span> <span class="n">basically</span> <span class="n">means</span> <span class="n">that</span> <span class="n">the</span> <span class="n">Range</span> <span class="n">locks</span> <span class="n">the</span> <span class="n">device</span> <span class="n">that</span> <span class="n">it</span> <span class="n">can</span> <span class="n">be</span> <span class="n">used</span> <span class="n">with</span><span class="o">.</span> - - <span class="n">So</span> <span class="n">when</span> <span class="n">we</span> <span class="n">have</span> <span class="n">a</span> <span class="n">Kernel</span><span class="o">.</span> - - <span class="n">Kernel</span> <span class="n">kernel</span> <span class="o">=</span> <span class="k">new</span> <span class="n">Kernel</span><span class="o">(){</span> - <span class="nd">@Override</span> <span class="kd">public</span> <span class="kt">void</span> <span class="nf">run</span><span class="o">(){</span> - <span class="o">...</span> - <span class="o">}</span> - <span class="o">}</span> - </code></pre> - <p>And we then use</p> - <pre class="highlight java"><code> - <span class="n">Device</span> <span class="n">device</span> <span class="o">=</span> <span class="n">Device</span><span class="o">.</span><span class="na">firstGPU</span><span class="o">();</span> - <span class="kd">final</span> <span class="kt">char</span> <span class="n">input</span><span class="o">[]</span> <span class="o">=</span> <span class="k">new</span> <span class="kt">char</span><span class="o">[((</span><span class="n">OpenCLDevice</span><span class="o">)</span><span class="n">device</span><span class="o">).</span><span class="na">getMaxMemory</span><span class="o">()/</span><span class="mi">4</span><span class="o">);</span> - <span class="n">Kernel</span> <span class="n">kernel</span> <span class="o">=</span> <span class="k">new</span> <span class="n">Kernel</span><span class="o">(){</span> - <span class="nd">@Override</span> <span class="kd">public</span> <span class="kt">void</span> <span class="nf">run</span><span class="o">(){</span> - <span class="c1">// uses input[];</span> - <span class="o">}</span> - <span class="o">};</span> - <span class="n">range</span> <span class="o">=</span> <span class="n">device</span><span class="o">.</span><span class="na">createRange2D</span><span class="o">(</span><span class="mi">1024</span><span class="o">,</span> <span class="mi">1024</span><span class="o">);</span> - <span class="n">kernel</span><span class="o">.</span><span class="na">execute</span><span class="o">(</span><span class="n">range</span><span class="o">);</span> - </code></pre> - <p>We have forced execution on the first GPU. Java fallback would still be possible (should we forbid this?).</p> - <pre class="highlight java"><code> - <span class="n">kernel</span><span class="o">.</span><span class="na">execute</span><span class="o">(</span> <span class="n">Device</span><span class="o">.</span><span class="na">firstGPU</span><span class="o">().</span><span class="na">getRange2D</span><span class="o">(</span><span class="n">width</span><span class="o">,</span> <span class="n">height</span><span class="o">));</span> - </code></pre> - </div> - </main> - <footer class='page-footer'> - <div class='container'> - <div class='row'> - <div class='col l4 s12'> - <h5 class='white-text'>Help Aparapi Grow</h5> - <p class='grey-text text-lighten-4'>We are a team of volunteers working on this project like it's our full time job. Any amount would help support and continue development on this project and is greatly appreciated.</p> - <form action='https://www.paypal.com/cgi-bin/webscr' id='paypal-donate' method='post' target='_top'> - <input name='cmd' type='hidden' value='_s-xclick'> - <input name='encrypted' type='hidden' value='-----BEGIN PKCS7-----MIIHoAYJKoZIhvcNAQcEoIIHkTCCB40CAQExggEwMIIBLAIBADCBlDCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20CAQAwDQYJKoZIhvcNAQEBBQAEgYATcKxN8t35TG2x34eY272SuZO3QbGy+BTGIM5DRV6Hmosotzw2TF42ceWmbXb3Gk4Wy5kUgo4TgHExCZHUSlHUl+A9KWLFejotgQJPhbiBsnns3klWbKftA3LEnP/kz/SW7OyBlpluoHoEGb354/aoX3JEctp3akHiZEmD7JyEgjELMAkGBSsOAwIaBQAwggEcBgkqhkiG9w0BBwEwFAYIKoZIhvcNAwcECOGCJwba6JICgIH4RtE1LE3juagKs+swI5tb9Y2LacWo+qn1H1aLKeg57bQMqqcWYvkoO1joYoglPc1h4mO0egZjHPQ6ih0K0IYlXw2SRpNylSlIMUE3GW6smjSSwRhscZfXQYUnmQsfYvkFwoKrlZGf/1u0Q7nwlZ1szIKnDMZ5f+k8xBcM0sMNutn/y9CH6A3zo01gQBIF29+1WYAoQspNAnfWQy3ydV7nbjIA9ThDp2WquWw3EVlvqlvm/3C2AFuH/L4q0ltn3qjkCdzXK0O2jW3TRrzligPkAy6CN0Tw2jGW5GENNC1L92vHFH4kBXUPlhvw39TgoN7/KRUjVoYPYgugggOHMIIDgzCCAuygAwIBAgIBADANBgkqhkiG9w0BAQUFADCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20wHhcNMDQwMjEzMTAxMzE1WhcNMzUwMjEzMTAxMzE1WjCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20wgZ8wDQYJKoZIhvcNAQEBBQADgY0AMIGJAoGBAMFHTt38RMxLXJyO2SmS+Ndl72T7oKJ4u4uw+6awntALWh03PewmIJuzbALScsTS4sZoS1fKciBGoh11gIfHzylvkdNe/hJl66/RGqrj5rFb08sAABNTzDTiqqNpJeBsYs/c2aiGozptX2RlnBktH+SUNpAajW724Nv2Wvhif6sFAgMBAAGjge4wgeswHQYDVR0OBBYEFJaffLvGbxe9WT9S1wob7BDWZJRrMIG7BgNVHSMEgbMwgbCAFJaffLvGbxe9WT9S1wob7BDWZJRroYGUpIGRMIGOMQswCQYDVQQGEwJVUzELMAkGA1UECBMCQ0ExFjAUBgNVBAcTDU1vdW50YWluIFZpZXcxFDASBgNVBAoTC1BheVBhbCBJbmMuMRMwEQYDVQQLFApsaXZlX2NlcnRzMREwDwYDVQQDFAhsaXZlX2FwaTEcMBoGCSqGSIb3DQEJARYNcmVAcGF5cGFsLmNvbYIBADAMBgNVHRMEBTADAQH/MA0GCSqGSIb3DQEBBQUAA4GBAIFfOlaagFrl71+jq6OKidbWFSE+Q4FqROvdgIONth+8kSK//Y/4ihuE4Ymvzn5ceE3S/iBSQQMjyvb+s2TWbQYDwcp129OPIbD9epdr4tJOUNiSojw7BHwYRiPh58S1xGlFgHFXwrEBb3dgNbMUa+u4qectsMAXpVHnD9wIyfmHMYIBmjCCAZYCAQEwgZQwgY4xCzAJBgNVBAYTAlVTMQswCQYDVQQIEwJDQTEWMBQGA1UEBxMNTW91bnRhaW4gVmlldzEUMBIGA1UEChMLUGF5UGFsIEluYy4xEzARBgNVBAsUCmxpdmVfY2VydHMxETAPBgNVBAMUCGxpdmVfYXBpMRwwGgYJKoZIhvcNAQkBFg1yZUBwYXlwYWwuY29tAgEAMAkGBSsOAwIaBQCgXTAYBgkqhkiG9w0BCQMxCwYJKoZIhvcNAQcBMBwGCSqGSIb3DQEJBTEPFw0xNjExMjkyMjA1NTNaMCMGCSqGSIb3DQEJBDEWBBS4i3Exr/pFcKOJy8uKmH+nGIMjqDANBgkqhkiG9w0BAQEFAASBgDAbFZ2jieloeB/0wCAcvYCFAIXmmBaMS5js/byzU5gK7exSTlRMX74IkmHemItaOcw3wyFlu4i118D9K9SbSbFiX9DGDcezGh42u/6G8TuZMwlvmiehwMioTVcm4jWG40YLiv8pJZypfoSx2w4IAFb4na5i/E1qOrwQOpiBho+s-----END PKCS7-----'> - <button alt='PayPal - The safer, easier way to pay online!' class='btn waves-effect waves-light red lighten-3' name='action' type='submit'> - Donate Now - </button> - </form> - </div> - <div class='col l4 s12'> - <h5 class='white-text'>Join the Discussion</h5> - <p class='grey-text text-lighten-4'>We have a Gitter chat room set up where you can talk directly with us. Come in and discuss new features, future goals, general problems or questions, or anything else you can think of.</p> - <a class='btn waves-effect waves-light red lighten-3' href='https://gitter.im/Syncleus/aparapi' target='_blank'>Chat</a> - </div> - <div class='col l4 s12' style='overflow: hidden;'> - <h5 class='white-text'>Connect</h5> - <iframe allowtransparency='true' frameborder='0' height='30' scrolling='0' src='http://ghbtns.com/github-btn.html?user=Syncleus&repo=aparapi&type=watch&count=true&size=large' width='170'></iframe> - <br> - <a class='twitter-follow-button' data-dnt='true' data-show-count='true' data-size='large' href='https://twitter.com/AparapiLib'>Follow @AparapiLib</a> - <br> - <div class='g-follow' data-annotation='bubble' data-height='24' data-href='https://plus.google.com/102266131584900704956' data-rel='publisher'></div> - </div> - </div> - </div> - <div class='footer-copyright'> - <div class='container'> - © 2016-2017 Syncleus, All rights reserved. - <a class='grey-text text-lighten-4 right' href='https://github.com/Syncleus/aparapi/blob/master/LICENSE'>Apache License v2</a> - </div> - </div> - </footer> - <!-- Scripts --> - <script src='https://code.jquery.com/jquery-2.1.4.min.js'></script> - <script> - if (!window.jQuery) { document.write('<script src="bin/jquery-2.1.1.min.js"><\/script>'); } - </script> - <script src='/javascripts/jquery.timeago.js'></script> - <script src='/javascripts/materialize.min.js'></script> - <script src='/javascripts/lunr.min.js'></script> - <script src='/javascripts/search.js'></script> - <script src='/javascripts/materialize.js'></script> - <script src='/javascripts/init.js'></script> - <!-- Twitter Button --> - <script> - !function(d,s,id){var js,fjs=d.getElementsByTagName(s)[0],p=/^http:/.test(d.location)?'http':'https';if(!d.getElementById(id)){js=d.createElement(s);js.id=id;js.src=p+'://platform.twitter.com/widgets.js';fjs.parentNode.insertBefore(js,fjs);}}(document, 'script', 'twitter-wjs'); - </script> - <!-- Google Plus Button --> - <script async='' defer='defer' src='https://apis.google.com/js/platform.js'></script> - </body> +<head> +<meta content='text/html; charset=UTF-8' http-equiv='Content-Type'> +<meta content='width=device-width, initial-scale=1' name='viewport'> +<meta content='IE=edge' http-equiv='X-UA-Compatible'> +<meta content='no' name='msapplication-tap-highlight'> +<meta content='Aparapi is an Open-source framework for executing native Java code on the GPU, developed by Syncleus.' name='description'> +<title> +Aparapi | Device +</title> +<!-- Favicons --> +<link href='/images/favicon/apple-touch-icon-152x152.png' rel='apple-touch-icon-precomposed'> +<meta content='#FFFFFF' name='msapplication-TileColor'> +<meta content='/images/favicon/mstile-144x144.png' name='msapplication-TileImage'> +<link href='/images/favicon/favicon-32x32.png' rel='icon' sizes='32x32'> +<!-- Android 5 Chrome Color --> +<meta content='#EE6E73' name='theme-color'> +<!-- CSS --> +<link href='/stylesheets/highlight.css' media='screen,projection' rel='stylesheet' type='text/css'> +<link href='/stylesheets/style.css' media='screen,projection' rel='stylesheet' type='text/css'> +<link href='http://fonts.googleapis.com/css?family=Inconsolata' rel='stylesheet' type='text/css'> +<link href='http://fonts.googleapis.com/icon?family=Material+Icons' rel='stylesheet'> +</head> +<body> +<header> +<div class='container'> +<a class='button-collapse top-nav waves-effect waves-light circle hide-on-large-only' data-activates='nav-mobile' href='#'> +<i class='material-icons'>menu</i> +</a> +</div> +<ul class='side-nav fixed' id='nav-mobile'> +<li class='logo'> +<a class='brand-logo' href='/' id='logo-container'> +<object data='/images/logo.svg' id='front-page-logo' type='image/svg+xml'>Your browser does not support SVG</object> +</a> +</li> +<li class='search'> +<div class='search-wrapper card'> +<input id='search'> +<i class='material-icons'>search</i> +<div class='search-results'></div> +</div> +</li> +<li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/">Overview</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Introduction</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/about.html">About</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/getting-started.html">Getting Started</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/faq.html">FAQ</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Documentation</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/aparapi-patterns.html">Aparapi Patterns</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/choosing-specific-devices.html">Choosing Specific Devices</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/converting-java-to-opencl.html">Converting Java to OpenCL</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/emulating-multiple-entrypoints.html">Emulating Multiple Entrypoints</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/explicit-buffer-handling.html">Explicit Buffer Handling</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/hsa-enabled-lambda.html">HSA Enabled Lambda</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/kernel-guidelines.html">Kernel Guidelines</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/library-agent-duality.html">Library Agent Duality</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/new-features.html">New Features</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/opencl-bindings.html">OpenCL Bindings</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/private-memory-space.html">Private Memory Space</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/profiling-the-kernel.html">Profiling the Kernel</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/setting-up-hsa.html">Setting Up HSA</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/unit-tests.html">Unit Tests</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/using-hsa-simulator.html">Using HSA Simulator</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/constant-memory.html">Constant Memory</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/local-memory.html">Local Memory</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/multiple-dim-ranges.html">Multiple Dim Ranges</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Proposals</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/multiple-dim-nd-range.html">Multiple Dim ND Range</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/lambdas.html">Lambdas</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/address-space-with-buffers.html">Address Space with Buffers</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/extensions.html">Extensions</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/device.html">Device</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/multiple-entry-points.html">Multiple Entry Points</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/lambda-syntax.html">Lambda Syntax</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/showcase.html">Showcase</a></li></ul></li> +</ul> +</header> +<main> +<div class='section no-pad-bot' id='index-banner'> +<div class='container'> +<h1 class='header center-on-small-only'>Device</h1> +<div class='row center'> +<h4 class='header col s12 light center'>How we might use the extension mechanism devices for general Kernel execution.</h4> +</div> + +</div> + +</div> +<div class='container'> +<p>At present the first GPU or CPU device (depending on Kernel.ExecutionMode value) is chosen at execution time. This make it easy to execute simple Kernels, but is problematic when using some advanced feature (barriers, local memory) or for sizing buffers appropriate for the target device. I propose that we add API’s to allow the developer to specify exactly which device we intend to target.</p> + +<p>In the extension proposal branch we needed to expose a Device class for binding arbitrary OpenCL to a Java interface. I suggest we also be use this to query device information useful for allocating suitable size global buffers/local buffers, and for dispatching Kernel’s to specific devices.</p> + +<p>The general pattern would be that we ask Aparapi to give us a Device, probably via a Device factory method.</p> + +<p>Something like:-</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="n">Device</span> <span class="n">device</span> <span class="o">=</span> <span class="n">Device</span><span class="o">.</span><span class="na">best</span><span class="o">();</span> +</code></pre></div> +<p>We would also offer other useful factory methods <code>getBestGPU()</code>, <code>getFirstCPU()</code>, <code>getJavaMultiThread()</code>, <code>getJavaSequential()</code> as well as a method to get all device so that the developer can filter themselves.</p> + +<p>Note that as well as real OpenCL devices we also expose ‘pseudo’ devices such as JavaMultiThread and Sequential. We might also allow pseudo devices to group multiple devices. So <code>getAllGPUDevices()</code> might return a pseudo device for executing across devices.</p> +<div class="highlight"><pre class="highlight plaintext"><code>Device chosen=null; +for (Device device: devices.getAll()){ + if (device.getVendor().contains("AMD") && device.isGPU()){ + chosen = device; + break; + } +} +</code></pre></div> +<p>A Device can be queried (<code>isGPU()</code>, <code>isOpenCL()</code>, <code>isGroup()</code>, <code>isJava()</code>, <code>getOpenCLPlatform()</code>, <code>getMaxMemory()</code>, <code>getLocalSizes()</code>) and may need to be cast to specific types.</p> + +<p>This would allow us to configure buffers.</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="n">Device</span> <span class="n">device</span> <span class="o">=</span> <span class="n">Device</span><span class="o">.</span><span class="na">best</span><span class="o">();</span> +<span class="k">if</span> <span class="o">(</span><span class="n">device</span> <span class="k">instanceof</span> <span class="n">OpenCLDevice</span><span class="o">){</span> + <span class="n">OpenCLDevice</span> <span class="n">openCLDevice</span> <span class="o">=</span> <span class="o">(</span><span class="n">OpenCLDevice</span><span class="o">)</span><span class="n">device</span><span class="o">;</span> + <span class="kt">char</span> <span class="n">input</span><span class="o">[]</span> <span class="o">=</span> <span class="k">new</span> <span class="kt">char</span><span class="o">[</span><span class="n">openCLDevice</span><span class="o">.</span><span class="na">getMaxMemory</span><span class="o">()/</span><span class="mi">4</span><span class="o">);</span> +<span class="o">}</span> +</code></pre></div> +<p>We can also use the Device as a factory for creating Ranges.</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="n">Range</span> <span class="n">range</span> <span class="o">=</span> <span class="n">device</span><span class="o">.</span><span class="na">createRange2D</span><span class="o">(</span><span class="n">width</span><span class="o">,</span> <span class="n">height</span><span class="o">);</span> +</code></pre></div> +<p>This allows the Range to be created with knowledge of the underlying device. So for example <code>device.createRange3D(1024, 1024, 1024, 16, 16, 16)</code> will fail if the device does not allow a local size of (16x16x16).</p> + +<p>A range created using <code>device.createRangeXX()</code> would also capture the device that created it. As if we had</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="n">Range</span> <span class="n">range</span> <span class="o">=</span> <span class="n">device</span><span class="o">.</span><span class="na">createRange2D</span><span class="o">(</span><span class="n">width</span><span class="o">,</span> <span class="n">height</span><span class="o">);</span> +<span class="c1">// implied range.setDevice(device);</span> +<span class="n">This</span> <span class="n">basically</span> <span class="n">means</span> <span class="n">that</span> <span class="n">the</span> <span class="n">Range</span> <span class="n">locks</span> <span class="n">the</span> <span class="n">device</span> <span class="n">that</span> <span class="n">it</span> <span class="n">can</span> <span class="n">be</span> <span class="n">used</span> <span class="n">with</span><span class="o">.</span> + +<span class="n">So</span> <span class="n">when</span> <span class="n">we</span> <span class="n">have</span> <span class="n">a</span> <span class="n">Kernel</span><span class="o">.</span> + +<span class="n">Kernel</span> <span class="n">kernel</span> <span class="o">=</span> <span class="k">new</span> <span class="n">Kernel</span><span class="o">(){</span> + <span class="nd">@Override</span> <span class="kd">public</span> <span class="kt">void</span> <span class="nf">run</span><span class="o">(){</span> + <span class="o">...</span> + <span class="o">}</span> +<span class="o">}</span> +</code></pre></div> +<p>And we then use</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="n">Device</span> <span class="n">device</span> <span class="o">=</span> <span class="n">Device</span><span class="o">.</span><span class="na">firstGPU</span><span class="o">();</span> +<span class="kd">final</span> <span class="kt">char</span> <span class="n">input</span><span class="o">[]</span> <span class="o">=</span> <span class="k">new</span> <span class="kt">char</span><span class="o">[((</span><span class="n">OpenCLDevice</span><span class="o">)</span><span class="n">device</span><span class="o">).</span><span class="na">getMaxMemory</span><span class="o">()/</span><span class="mi">4</span><span class="o">);</span> +<span class="n">Kernel</span> <span class="n">kernel</span> <span class="o">=</span> <span class="k">new</span> <span class="n">Kernel</span><span class="o">(){</span> + <span class="nd">@Override</span> <span class="kd">public</span> <span class="kt">void</span> <span class="nf">run</span><span class="o">(){</span> + <span class="c1">// uses input[];</span> + <span class="o">}</span> +<span class="o">};</span> +<span class="n">range</span> <span class="o">=</span> <span class="n">device</span><span class="o">.</span><span class="na">createRange2D</span><span class="o">(</span><span class="mi">1024</span><span class="o">,</span> <span class="mi">1024</span><span class="o">);</span> +<span class="n">kernel</span><span class="o">.</span><span class="na">execute</span><span class="o">(</span><span class="n">range</span><span class="o">);</span> +</code></pre></div> +<p>We have forced execution on the first GPU. Java fallback would still be possible (should we forbid this?).</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="n">kernel</span><span class="o">.</span><span class="na">execute</span><span class="o">(</span> <span class="n">Device</span><span class="o">.</span><span class="na">firstGPU</span><span class="o">().</span><span class="na">getRange2D</span><span class="o">(</span><span class="n">width</span><span class="o">,</span> <span class="n">height</span><span class="o">));</span> +</code></pre></div> +</div> +</main> +<footer class='page-footer'> +<div class='container'> +<div class='row'> +<div class='col l4 s12'> +<h5 class='white-text'>Help Aparapi Grow</h5> +<p class='grey-text text-lighten-4'>We are a team of volunteers working on this project like it's our full time job. Any amount would help support and continue development on this project and is greatly appreciated.</p> +<form action='https://www.paypal.com/cgi-bin/webscr' id='paypal-donate' method='post' target='_top'> +<input name='cmd' type='hidden' value='_s-xclick'> +<input name='encrypted' type='hidden' value='-----BEGIN PKCS7-----MIIHoAYJKoZIhvcNAQcEoIIHkTCCB40CAQExggEwMIIBLAIBADCBlDCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20CAQAwDQYJKoZIhvcNAQEBBQAEgYATcKxN8t35TG2x34eY272SuZO3QbGy+BTGIM5DRV6Hmosotzw2TF42ceWmbXb3Gk4Wy5kUgo4TgHExCZHUSlHUl+A9KWLFejotgQJPhbiBsnns3klWbKftA3LEnP/kz/SW7OyBlpluoHoEGb354/aoX3JEctp3akHiZEmD7JyEgjELMAkGBSsOAwIaBQAwggEcBgkqhkiG9w0BBwEwFAYIKoZIhvcNAwcECOGCJwba6JICgIH4RtE1LE3juagKs+swI5tb9Y2LacWo+qn1H1aLKeg57bQMqqcWYvkoO1joYoglPc1h4mO0egZjHPQ6ih0K0IYlXw2SRpNylSlIMUE3GW6smjSSwRhscZfXQYUnmQsfYvkFwoKrlZGf/1u0Q7nwlZ1szIKnDMZ5f+k8xBcM0sMNutn/y9CH6A3zo01gQBIF29+1WYAoQspNAnfWQy3ydV7nbjIA9ThDp2WquWw3EVlvqlvm/3C2AFuH/L4q0ltn3qjkCdzXK0O2jW3TRrzligPkAy6CN0Tw2jGW5GENNC1L92vHFH4kBXUPlhvw39TgoN7/KRUjVoYPYgugggOHMIIDgzCCAuygAwIBAgIBADANBgkqhkiG9w0BAQUFADCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20wHhcNMDQwMjEzMTAxMzE1WhcNMzUwMjEzMTAxMzE1WjCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20wgZ8wDQYJKoZIhvcNAQEBBQADgY0AMIGJAoGBAMFHTt38RMxLXJyO2SmS+Ndl72T7oKJ4u4uw+6awntALWh03PewmIJuzbALScsTS4sZoS1fKciBGoh11gIfHzylvkdNe/hJl66/RGqrj5rFb08sAABNTzDTiqqNpJeBsYs/c2aiGozptX2RlnBktH+SUNpAajW724Nv2Wvhif6sFAgMBAAGjge4wgeswHQYDVR0OBBYEFJaffLvGbxe9WT9S1wob7BDWZJRrMIG7BgNVHSMEgbMwgbCAFJaffLvGbxe9WT9S1wob7BDWZJRroYGUpIGRMIGOMQswCQYDVQQGEwJVUzELMAkGA1UECBMCQ0ExFjAUBgNVBAcTDU1vdW50YWluIFZpZXcxFDASBgNVBAoTC1BheVBhbCBJbmMuMRMwEQYDVQQLFApsaXZlX2NlcnRzMREwDwYDVQQDFAhsaXZlX2FwaTEcMBoGCSqGSIb3DQEJARYNcmVAcGF5cGFsLmNvbYIBADAMBgNVHRMEBTADAQH/MA0GCSqGSIb3DQEBBQUAA4GBAIFfOlaagFrl71+jq6OKidbWFSE+Q4FqROvdgIONth+8kSK//Y/4ihuE4Ymvzn5ceE3S/iBSQQMjyvb+s2TWbQYDwcp129OPIbD9epdr4tJOUNiSojw7BHwYRiPh58S1xGlFgHFXwrEBb3dgNbMUa+u4qectsMAXpVHnD9wIyfmHMYIBmjCCAZYCAQEwgZQwgY4xCzAJBgNVBAYTAlVTMQswCQYDVQQIEwJDQTEWMBQGA1UEBxMNTW91bnRhaW4gVmlldzEUMBIGA1UEChMLUGF5UGFsIEluYy4xEzARBgNVBAsUCmxpdmVfY2VydHMxETAPBgNVBAMUCGxpdmVfYXBpMRwwGgYJKoZIhvcNAQkBFg1yZUBwYXlwYWwuY29tAgEAMAkGBSsOAwIaBQCgXTAYBgkqhkiG9w0BCQMxCwYJKoZIhvcNAQcBMBwGCSqGSIb3DQEJBTEPFw0xNjExMjkyMjA1NTNaMCMGCSqGSIb3DQEJBDEWBBS4i3Exr/pFcKOJy8uKmH+nGIMjqDANBgkqhkiG9w0BAQEFAASBgDAbFZ2jieloeB/0wCAcvYCFAIXmmBaMS5js/byzU5gK7exSTlRMX74IkmHemItaOcw3wyFlu4i118D9K9SbSbFiX9DGDcezGh42u/6G8TuZMwlvmiehwMioTVcm4jWG40YLiv8pJZypfoSx2w4IAFb4na5i/E1qOrwQOpiBho+s-----END PKCS7----- +'> +<button alt='PayPal - The safer, easier way to pay online!' class='btn waves-effect waves-light red lighten-3' name='action' type='submit'> +Donate Now +</button> +</form> +</div> +<div class='col l4 s12'> +<h5 class='white-text'>Join the Discussion</h5> +<p class='grey-text text-lighten-4'>We have a Gitter chat room set up where you can talk directly with us. Come in and discuss new features, future goals, general problems or questions, or anything else you can think of.</p> +<a class='btn waves-effect waves-light red lighten-3' href='https://gitter.im/Syncleus/aparapi' target='_blank'>Chat</a> +</div> +<div class='col l4 s12' style='overflow: hidden;'> +<h5 class='white-text'>Connect</h5> +<iframe allowtransparency='true' frameborder='0' height='30' scrolling='0' src='http://ghbtns.com/github-btn.html?user=Syncleus&repo=aparapi&type=watch&count=true&size=large' width='170'></iframe> +<br> +<a class='twitter-follow-button' data-dnt='true' data-show-count='true' data-size='large' href='https://twitter.com/AparapiLib'>Follow @AparapiLib</a> +<br> +<div class='g-follow' data-annotation='bubble' data-height='24' data-href='https://plus.google.com/102266131584900704956' data-rel='publisher'></div> +</div> +</div> +</div> +<div class='footer-copyright'> +<div class='container'> +© 2016-2017 Syncleus, All rights reserved. +<a class='grey-text text-lighten-4 right' href='https://github.com/Syncleus/aparapi/blob/master/LICENSE'>Apache License v2</a> +</div> +</div> +</footer> +<!-- Scripts --> +<script src='https://code.jquery.com/jquery-2.1.4.min.js'></script> +<script> + if (!window.jQuery) { document.write('<script src="bin/jquery-2.1.1.min.js"><\/script>'); } +</script> +<script src='/javascripts/jquery.timeago.js'></script> +<script src='/javascripts/materialize.min.js'></script> +<script src='/javascripts/lunr.min.js'></script> +<script src='/javascripts/search.js'></script> +<script src='/javascripts/materialize.js'></script> +<script src='/javascripts/init.js'></script> +<!-- Twitter Button --> +<script> + !function(d,s,id){var js,fjs=d.getElementsByTagName(s)[0],p=/^http:/.test(d.location)?'http':'https';if(!d.getElementById(id)){js=d.createElement(s);js.id=id;js.src=p+'://platform.twitter.com/widgets.js';fjs.parentNode.insertBefore(js,fjs);}}(document, 'script', 'twitter-wjs'); +</script> +<!-- Google Plus Button --> +<script async='' defer='defer' src='https://apis.google.com/js/platform.js'></script> +</body> </html> diff --git a/proposals/extensions.html b/proposals/extensions.html index 0c7e47e3144429687044f52fdea5d8da8e495ae5..583b161c1f13a9a4e4776fda30984a21f09bdde4 100644 --- a/proposals/extensions.html +++ b/proposals/extensions.html @@ -1,374 +1,376 @@ <!DOCTYPE html> <html lang='en'> - <head> - <meta content='text/html; charset=UTF-8' http-equiv='Content-Type'> - <meta content='width=device-width, initial-scale=1' name='viewport'> - <meta content='IE=edge' http-equiv='X-UA-Compatible'> - <meta content='no' name='msapplication-tap-highlight'> - <meta content='Aparapi is an Open-source framework for executing native Java code on the GPU, developed by Syncleus.' name='description'> - <title> - Aparapi | Extensions - </title> - <!-- Favicons --> - <link href='/images/favicon/apple-touch-icon-152x152.png' rel='apple-touch-icon-precomposed'> - <meta content='#FFFFFF' name='msapplication-TileColor'> - <meta content='/images/favicon/mstile-144x144.png' name='msapplication-TileImage'> - <link href='/images/favicon/favicon-32x32.png' rel='icon' sizes='32x32'> - <!-- Android 5 Chrome Color --> - <meta content='#EE6E73' name='theme-color'> - <!-- CSS --> - <link href='/stylesheets/highlight.css' media='screen,projection' rel='stylesheet' type='text/css'> - <link href='/stylesheets/style.css' media='screen,projection' rel='stylesheet' type='text/css'> - <link href='http://fonts.googleapis.com/css?family=Inconsolata' rel='stylesheet' type='text/css'> - <link href='http://fonts.googleapis.com/icon?family=Material+Icons' rel='stylesheet'> - </head> - <body> - <header> - <div class='container'> - <a class='button-collapse top-nav waves-effect waves-light circle hide-on-large-only' data-activates='nav-mobile' href='#'> - <i class='material-icons'>menu</i> - </a> - </div> - <ul class='side-nav fixed' id='nav-mobile'> - <li class='logo'> - <a class='brand-logo' href='/' id='logo-container'> - <object data='/images/logo.svg' id='front-page-logo' type='image/svg+xml'>Your browser does not support SVG</object> - </a> - </li> - <li class='search'> - <div class='search-wrapper card'> - <input id='search'> - <i class='material-icons'>search</i> - <div class='search-results'></div> - </div> - </li> - <li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/">Overview</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Introduction</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/about.html">About</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/getting-started.html">Getting Started</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/faq.html">FAQ</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Documentation</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/aparapi-patterns.html">Aparapi Patterns</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/choosing-specific-devices.html">Choosing Specific Devices</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/converting-java-to-opencl.html">Converting Java to OpenCL</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/emulating-multiple-entrypoints.html">Emulating Multiple Entrypoints</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/explicit-buffer-handling.html">Explicit Buffer Handling</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/hsa-enabled-lambda.html">HSA Enabled Lambda</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/kernel-guidelines.html">Kernel Guidelines</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/library-agent-duality.html">Library Agent Duality</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/new-features.html">New Features</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/opencl-bindings.html">OpenCL Bindings</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/private-memory-space.html">Private Memory Space</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/profiling-the-kernel.html">Profiling the Kernel</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/setting-up-hsa.html">Setting Up HSA</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/unit-tests.html">Unit Tests</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/using-hsa-simulator.html">Using HSA Simulator</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/constant-memory.html">Constant Memory</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/local-memory.html">Local Memory</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/multiple-dim-ranges.html">Multiple Dim Ranges</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Proposals</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/multiple-dim-nd-range.html">Multiple Dim ND Range</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/lambdas.html">Lambdas</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/address-space-with-buffers.html">Address Space with Buffers</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/extensions.html">Extensions</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/device.html">Device</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/multiple-entry-points.html">Multiple Entry Points</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/lambda-syntax.html">Lambda Syntax</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/showcase.html">Showcase</a></li></ul></li> - </ul> - </header> - <main> - <div class='section no-pad-bot' id='index-banner'> - <div class='container'> - <h1 class='header center-on-small-only'>Extensions</h1> - <div class='row center'> - <h4 class='header col s12 light center'>A proposed aparapi extension mechanism.</h4> - </div> - - </div> - - </div> - <div class='container'> - <h2>Here is a proposed Aparapi extension mechanism</h2> - - <p>This would allow a developer to create a library that could be used by Aparapi Kernel code. The library would include OpenCL and Java implementations.</p> - - <p>We will treat this as a live document. Please join the discussions at http://groups.google.com/group/aparapi-discuss/browse_thread/thread/7ec81ecb2169aa4 and I will update this page to reflect what I think the latest decisions are:-</p> - - <p>Currently Aparapi allows Java bytecode to be converted to OpenCL at runtime. Only the OpenCL generated by this conversion process is made available. Sometimes for performance reasons we might want to allow hand coded OpenCL to be called from Aparapi kernel code.</p> - - <p>Here we will present a strawman API which would allow extension points to be added by an end user or by a library provider.</p> - - <p>We will use an FFT usecase to walk through the steps.</p> - - <p>The FFT (Fast Fourier Transform) algorithm can be coded in Aparapi, but for performance reasons handcrafted OpenCL is likely to be more performant. The goal is to allow Aparapi to do what it does best, i.e. manage the host buffer allocations and provide a mechanism for binding arbitrary opencl code at runtime.</p> - - <p>So lets assume we wanted an Aparapi Kernel to be able to call an Aparapi extension for computing FFT (forward and reverse). The Kernel implementation might look like this.</p> - <pre class="highlight java"><code> - <span class="kd">public</span> <span class="kd">static</span> <span class="kd">class</span> <span class="nc">BandStopFilter</span> <span class="kd">extends</span> <span class="n">Kernel</span><span class="o">{</span> - <span class="n">FFT</span> <span class="n">fft</span> <span class="o">=</span> <span class="k">new</span> <span class="n">FFT</span><span class="o">();</span> <span class="c1">// Create an instance of the Extension point.</span> - <span class="kt">float</span><span class="o">[]</span> <span class="n">real</span><span class="o">;</span> - <span class="kt">float</span><span class="o">[]</span> <span class="n">imaginary</span><span class="o">;</span> - - <span class="n">BandStopFilter</span> <span class="o">(</span><span class="kt">float</span><span class="o">[]</span> <span class="n">_real</span><span class="o">){</span> - <span class="n">real</span> <span class="o">=</span> <span class="n">_real</span><span class="o">;</span> - <span class="n">imaginary</span> <span class="o">=</span> <span class="k">new</span> <span class="kt">float</span><span class="o">[</span><span class="n">_real</span><span class="o">.</span><span class="na">length</span><span class="o">];</span> - - <span class="o">}</span> - - <span class="nd">@Override</span> <span class="kd">public</span> <span class="kt">void</span> <span class="nf">run</span><span class="o">()</span> <span class="o">{</span> - <span class="n">fft</span><span class="o">.</span><span class="na">forward</span><span class="o">(</span><span class="n">real</span><span class="o">,</span> <span class="n">imaginary</span><span class="o">);</span> - <span class="o">}</span> - <span class="o">}</span> - </code></pre> - <p>The main method then would just execute the Kernel using the familiar kernel.execute() method :-</p> - <pre class="highlight java"><code> - <span class="kd">public</span> <span class="kd">static</span> <span class="kt">void</span> <span class="nf">main</span><span class="o">(</span><span class="n">String</span><span class="o">[]</span> <span class="n">args</span><span class="o">)</span> <span class="o">{</span> - <span class="kt">float</span><span class="o">[]</span> <span class="n">data</span> <span class="o">=</span> <span class="k">new</span> <span class="kt">float</span><span class="o">[</span><span class="mi">1024</span><span class="o">];</span> - <span class="n">BandStopFilter</span> <span class="n">kernel</span> <span class="o">=</span> <span class="k">new</span> <span class="n">BandStopFilter</span> <span class="o">(</span><span class="n">data</span><span class="o">);</span> - <span class="n">kernel</span><span class="o">.</span><span class="na">execute</span><span class="o">(</span><span class="n">data</span><span class="o">.</span><span class="na">length</span><span class="o">);</span> - <span class="o">}</span> - </code></pre> - <p>Essentially we want the <code>FFT.forward(float[] _real, float[] _imaginary)</code> and <code>FFT.reverse(float[] _real, float[] _imaginary)</code> methods to be callable from Aparapi Kernel code. We want Aparapi to handle the call-forwarding and the argument/buffer mapping transfers. We want Aparapi to call the Java methods normally if OpenCL is not available but would like Aparapi to use the implementor provided OpenCL if it is. So the implementor will be required to provide both a Java and an OpenCL version of the callable methods because Aparapi will decide which version needs to be called ant runtime.</p> - - <p>Any extension point is required to implement the AparapiExtensionPoint interface.</p> - <pre class="highlight java"><code> - <span class="kd">public</span> <span class="kd">class</span> <span class="nc">AparapiExtensionPoint</span> - <span class="kd">public</span> <span class="n">String</span> <span class="nf">getOpenCL</span><span class="o">();</span> - <span class="o">}</span> - </code></pre> - <p>Here is a possible (although incomplete) FFT implementation.</p> - <pre class="highlight java"><code> - <span class="kd">public</span> <span class="kd">class</span> <span class="nc">FFT</span> <span class="kd">implements</span> <span class="n">AparapiExtensionPoint</span><span class="o">{</span> - <span class="nd">@AparapiCallable</span> <span class="kd">public</span> <span class="kt">void</span> <span class="nf">forward</span><span class="o">(</span> - <span class="nd">@Global</span> <span class="nd">@ReadWrite</span> <span class="kt">float</span><span class="o">[]</span> <span class="n">_data</span><span class="o">,</span> - <span class="nd">@Global</span> <span class="nd">@ReadWrite</span> <span class="kt">float</span><span class="o">[]</span> <span class="n">_imaginary</span><span class="o">)</span> <span class="o">{</span> - <span class="c1">// java implementation</span> - <span class="o">}</span> - - <span class="nd">@AparapiCallable</span> <span class="kd">public</span> <span class="kt">void</span> <span class="nf">reverse</span><span class="o">(</span> - <span class="nd">@Global</span> <span class="nd">@ReadWrite</span> <span class="kt">float</span><span class="o">[]</span> <span class="n">_data</span><span class="o">,</span> - <span class="nd">@Global</span> <span class="nd">@ReadWrite</span> <span class="kt">float</span><span class="o">[]</span> <span class="n">_imaginary</span><span class="o">)</span> <span class="o">{</span> - <span class="c1">// java implementation</span> - <span class="o">}</span> - - <span class="nd">@Override</span> <span class="kd">public</span> <span class="n">String</span> <span class="nf">getOpenCL</span><span class="o">()</span> <span class="o">{</span> - <span class="k">return</span> <span class="s">""</span> - <span class="o">+</span><span class="s">"void my_package_FFT_forward("</span> - <span class="o">+</span><span class="s">" __global float* _real,"</span> - <span class="o">+</span><span class="s">" __global float* _imaginary )"</span> - <span class="o">+</span><span class="s">" {"</span> - <span class="o">+</span><span class="s">" // OpenCL implemention"</span> - <span class="o">+</span><span class="s">" }"</span> - <span class="o">+</span><span class="s">"void my_package_FFT_reverse("</span> - <span class="o">+</span><span class="s">" __global float* _real,"</span> - <span class="o">+</span><span class="s">" __global float* _imaginary )"</span> - <span class="o">+</span><span class="s">" {"</span> - <span class="o">+</span><span class="s">" // OpenCL implemention"</span> - <span class="o">+</span><span class="s">" }"</span><span class="o">;</span> - <span class="o">}</span> - <span class="o">}</span> - </code></pre> - <p>The implementer’s class will be required to define the callable aparapi methods as well as implement the <code>getOpenCL()</code> method so that the OpenCL implementation of those methods can be extracted at run-time.</p> - - <p>Aparapi will provide annotations to decorate the methods and args/parameters of the exposed callable methods . These annotations provide information so that Aparapi locate the callable methods as well as parameter hints to help coordinate buffer types (global, local, constant) and transfer directions (read,write, readWrite) when executing the methods from a Kernel. This information is consulted during the normal bytecode analysis that Aparapi provides when Aparapi hits the call site.</p> - - <p>Note that the Java code inside the <code>@AparapiCallable</code> functions (or code executed from it) is not constrained to the normal Aparapi subset. It can be any legitimate Java code, but should be thread safe (because it will be called from JTP mode!).</p> - - <p>Note also that the OpenCL code yielded from the <code>getOpenCL()</code> method is assumed to be complete, Aparapi does not attempt to parse this code. If the code fails to compile Aparapi will fallback and execute the whole Kernel in JTP mode.</p> - - <p>BTW we show getOpenCL() returning a String literal. This is most likely to be how code is returned. However, it could be extracted from a File? a resource in the Jar file? or dynamically generated based on some state. For example an FFT implementation might choose to use different code for radix2 or radix4 implementations (based on a paramater passed to <code>FFT()</code> constructor - say <code>FFT(FFT.RADIX2))</code> in which case the getOpenCL() method might yield different code.</p> - - <p>The above proposal covers the case where a third party might want to provide an Aparapi extension point as a library.</p> - - <p>We might also consider allowing single methods within the Kernel to be optimized, where the OpenCL is made available via the AparapiCallable annotation. The method would still use the same Annotations for the args (to allow buffer txfers to be optimized).</p> - <pre class="highlight java"><code> - <span class="n">Kernel</span> <span class="n">k</span> <span class="o">=</span> <span class="k">new</span> <span class="n">Kernel</span><span class="o">(){</span> - <span class="nd">@AparapiCallable</span><span class="o">(</span><span class="err">â€</span> <span class="cm">/* opencl code for sum() goes here */</span><span class="err">â€</span><span class="o">)</span> - <span class="kt">int</span> <span class="nf">sum</span><span class="o">(</span><span class="nd">@Global</span> <span class="nd">@ReadWrite</span> <span class="kt">int</span><span class="o">[]</span> <span class="n">data</span><span class="o">,</span> <span class="kt">int</span> <span class="n">length</span><span class="o">){</span> - <span class="kt">int</span> <span class="n">sum</span> <span class="o">=</span> <span class="mi">0</span><span class="o">;</span> - <span class="k">for</span> <span class="o">(</span><span class="kt">int</span> <span class="nl">v:</span><span class="n">data</span><span class="o">){</span> - <span class="n">sum</span><span class="o">+=</span><span class="n">v</span><span class="o">;</span> - <span class="o">}</span> - <span class="o">}</span> - <span class="nd">@Override</span> <span class="kd">public</span> <span class="kt">void</span> <span class="nf">run</span><span class="o">(){</span> - <span class="n">sum</span><span class="o">(</span><span class="n">data</span><span class="o">);</span> - <span class="o">}</span> - <span class="o">}</span> - </code></pre> - <p>Here are the proposed new interfaces/annotations</p> - <pre class="highlight java"><code> - <span class="kd">public</span> <span class="kd">interface</span> <span class="nc">AparapiExtensionPoint</span> <span class="o">{</span> - <span class="kd">public</span> <span class="n">String</span> <span class="nf">getOpenCL</span><span class="o">();</span> - <span class="o">}</span> - <span class="nd">@Retention</span><span class="o">(</span><span class="n">RetentionPolicy</span><span class="o">.</span><span class="na">RUNTIME</span><span class="o">)</span> <span class="nd">@Target</span><span class="o">(</span><span class="n">ElementType</span><span class="o">.</span><span class="na">METHOD</span><span class="o">)</span> - <span class="kd">public</span> <span class="nd">@interface</span> <span class="n">AparapiCallable</span> <span class="o">{</span> - <span class="n">String</span> <span class="n">value</span> <span class="k">default</span> <span class="n">NULL</span><span class="o">;</span> +<head> +<meta content='text/html; charset=UTF-8' http-equiv='Content-Type'> +<meta content='width=device-width, initial-scale=1' name='viewport'> +<meta content='IE=edge' http-equiv='X-UA-Compatible'> +<meta content='no' name='msapplication-tap-highlight'> +<meta content='Aparapi is an Open-source framework for executing native Java code on the GPU, developed by Syncleus.' name='description'> +<title> +Aparapi | Extensions +</title> +<!-- Favicons --> +<link href='/images/favicon/apple-touch-icon-152x152.png' rel='apple-touch-icon-precomposed'> +<meta content='#FFFFFF' name='msapplication-TileColor'> +<meta content='/images/favicon/mstile-144x144.png' name='msapplication-TileImage'> +<link href='/images/favicon/favicon-32x32.png' rel='icon' sizes='32x32'> +<!-- Android 5 Chrome Color --> +<meta content='#EE6E73' name='theme-color'> +<!-- CSS --> +<link href='/stylesheets/highlight.css' media='screen,projection' rel='stylesheet' type='text/css'> +<link href='/stylesheets/style.css' media='screen,projection' rel='stylesheet' type='text/css'> +<link href='http://fonts.googleapis.com/css?family=Inconsolata' rel='stylesheet' type='text/css'> +<link href='http://fonts.googleapis.com/icon?family=Material+Icons' rel='stylesheet'> +</head> +<body> +<header> +<div class='container'> +<a class='button-collapse top-nav waves-effect waves-light circle hide-on-large-only' data-activates='nav-mobile' href='#'> +<i class='material-icons'>menu</i> +</a> +</div> +<ul class='side-nav fixed' id='nav-mobile'> +<li class='logo'> +<a class='brand-logo' href='/' id='logo-container'> +<object data='/images/logo.svg' id='front-page-logo' type='image/svg+xml'>Your browser does not support SVG</object> +</a> +</li> +<li class='search'> +<div class='search-wrapper card'> +<input id='search'> +<i class='material-icons'>search</i> +<div class='search-results'></div> +</div> +</li> +<li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/">Overview</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Introduction</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/about.html">About</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/getting-started.html">Getting Started</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/faq.html">FAQ</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Documentation</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/aparapi-patterns.html">Aparapi Patterns</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/choosing-specific-devices.html">Choosing Specific Devices</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/converting-java-to-opencl.html">Converting Java to OpenCL</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/emulating-multiple-entrypoints.html">Emulating Multiple Entrypoints</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/explicit-buffer-handling.html">Explicit Buffer Handling</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/hsa-enabled-lambda.html">HSA Enabled Lambda</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/kernel-guidelines.html">Kernel Guidelines</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/library-agent-duality.html">Library Agent Duality</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/new-features.html">New Features</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/opencl-bindings.html">OpenCL Bindings</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/private-memory-space.html">Private Memory Space</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/profiling-the-kernel.html">Profiling the Kernel</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/setting-up-hsa.html">Setting Up HSA</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/unit-tests.html">Unit Tests</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/using-hsa-simulator.html">Using HSA Simulator</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/constant-memory.html">Constant Memory</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/local-memory.html">Local Memory</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/multiple-dim-ranges.html">Multiple Dim Ranges</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Proposals</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/multiple-dim-nd-range.html">Multiple Dim ND Range</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/lambdas.html">Lambdas</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/address-space-with-buffers.html">Address Space with Buffers</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/extensions.html">Extensions</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/device.html">Device</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/multiple-entry-points.html">Multiple Entry Points</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/lambda-syntax.html">Lambda Syntax</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/showcase.html">Showcase</a></li></ul></li> +</ul> +</header> +<main> +<div class='section no-pad-bot' id='index-banner'> +<div class='container'> +<h1 class='header center-on-small-only'>Extensions</h1> +<div class='row center'> +<h4 class='header col s12 light center'>A proposed aparapi extension mechanism.</h4> +</div> + +</div> + +</div> +<div class='container'> +<h2>Here is a proposed Aparapi extension mechanism</h2> + +<p>This would allow a developer to create a library that could be used by Aparapi Kernel code. The library would include OpenCL and Java implementations.</p> + +<p>We will treat this as a live document. Please join the discussions at http://groups.google.com/group/aparapi-discuss/browse_thread/thread/7ec81ecb2169aa4 and I will update this page to reflect what I think the latest decisions are:-</p> + +<p>Currently Aparapi allows Java bytecode to be converted to OpenCL at runtime. Only the OpenCL generated by this conversion process is made available. Sometimes for performance reasons we might want to allow hand coded OpenCL to be called from Aparapi kernel code.</p> + +<p>Here we will present a strawman API which would allow extension points to be added by an end user or by a library provider.</p> + +<p>We will use an FFT usecase to walk through the steps.</p> + +<p>The FFT (Fast Fourier Transform) algorithm can be coded in Aparapi, but for performance reasons handcrafted OpenCL is likely to be more performant. The goal is to allow Aparapi to do what it does best, i.e. manage the host buffer allocations and provide a mechanism for binding arbitrary opencl code at runtime.</p> + +<p>So lets assume we wanted an Aparapi Kernel to be able to call an Aparapi extension for computing FFT (forward and reverse). The Kernel implementation might look like this.</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="kd">public</span> <span class="kd">static</span> <span class="kd">class</span> <span class="nc">BandStopFilter</span> <span class="kd">extends</span> <span class="n">Kernel</span><span class="o">{</span> + <span class="n">FFT</span> <span class="n">fft</span> <span class="o">=</span> <span class="k">new</span> <span class="n">FFT</span><span class="o">();</span> <span class="c1">// Create an instance of the Extension point.</span> + <span class="kt">float</span><span class="o">[]</span> <span class="n">real</span><span class="o">;</span> + <span class="kt">float</span><span class="o">[]</span> <span class="n">imaginary</span><span class="o">;</span> + + <span class="n">BandStopFilter</span> <span class="o">(</span><span class="kt">float</span><span class="o">[]</span> <span class="n">_real</span><span class="o">){</span> + <span class="n">real</span> <span class="o">=</span> <span class="n">_real</span><span class="o">;</span> + <span class="n">imaginary</span> <span class="o">=</span> <span class="k">new</span> <span class="kt">float</span><span class="o">[</span><span class="n">_real</span><span class="o">.</span><span class="na">length</span><span class="o">];</span> + + <span class="o">}</span> + + <span class="nd">@Override</span> <span class="kd">public</span> <span class="kt">void</span> <span class="nf">run</span><span class="o">()</span> <span class="o">{</span> + <span class="n">fft</span><span class="o">.</span><span class="na">forward</span><span class="o">(</span><span class="n">real</span><span class="o">,</span> <span class="n">imaginary</span><span class="o">);</span> + <span class="o">}</span> +<span class="o">}</span> +</code></pre></div> +<p>The main method then would just execute the Kernel using the familiar kernel.execute() method :-</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="kd">public</span> <span class="kd">static</span> <span class="kt">void</span> <span class="nf">main</span><span class="o">(</span><span class="n">String</span><span class="o">[]</span> <span class="n">args</span><span class="o">)</span> <span class="o">{</span> + <span class="kt">float</span><span class="o">[]</span> <span class="n">data</span> <span class="o">=</span> <span class="k">new</span> <span class="kt">float</span><span class="o">[</span><span class="mi">1024</span><span class="o">];</span> + <span class="n">BandStopFilter</span> <span class="n">kernel</span> <span class="o">=</span> <span class="k">new</span> <span class="n">BandStopFilter</span> <span class="o">(</span><span class="n">data</span><span class="o">);</span> + <span class="n">kernel</span><span class="o">.</span><span class="na">execute</span><span class="o">(</span><span class="n">data</span><span class="o">.</span><span class="na">length</span><span class="o">);</span> +<span class="o">}</span> +</code></pre></div> +<p>Essentially we want the <code>FFT.forward(float[] _real, float[] _imaginary)</code> and <code>FFT.reverse(float[] _real, float[] _imaginary)</code> methods to be callable from Aparapi Kernel code. We want Aparapi to handle the call-forwarding and the argument/buffer mapping transfers. We want Aparapi to call the Java methods normally if OpenCL is not available but would like Aparapi to use the implementor provided OpenCL if it is. So the implementor will be required to provide both a Java and an OpenCL version of the callable methods because Aparapi will decide which version needs to be called ant runtime.</p> + +<p>Any extension point is required to implement the AparapiExtensionPoint interface.</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="kd">public</span> <span class="kd">class</span> <span class="nc">AparapiExtensionPoint</span> + <span class="kd">public</span> <span class="n">String</span> <span class="nf">getOpenCL</span><span class="o">();</span> +<span class="o">}</span> +</code></pre></div> +<p>Here is a possible (although incomplete) FFT implementation.</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="kd">public</span> <span class="kd">class</span> <span class="nc">FFT</span> <span class="kd">implements</span> <span class="n">AparapiExtensionPoint</span><span class="o">{</span> + <span class="nd">@AparapiCallable</span> <span class="kd">public</span> <span class="kt">void</span> <span class="nf">forward</span><span class="o">(</span> + <span class="nd">@Global</span> <span class="nd">@ReadWrite</span> <span class="kt">float</span><span class="o">[]</span> <span class="n">_data</span><span class="o">,</span> + <span class="nd">@Global</span> <span class="nd">@ReadWrite</span> <span class="kt">float</span><span class="o">[]</span> <span class="n">_imaginary</span><span class="o">)</span> <span class="o">{</span> + <span class="c1">// java implementation</span> + <span class="o">}</span> + + <span class="nd">@AparapiCallable</span> <span class="kd">public</span> <span class="kt">void</span> <span class="nf">reverse</span><span class="o">(</span> + <span class="nd">@Global</span> <span class="nd">@ReadWrite</span> <span class="kt">float</span><span class="o">[]</span> <span class="n">_data</span><span class="o">,</span> + <span class="nd">@Global</span> <span class="nd">@ReadWrite</span> <span class="kt">float</span><span class="o">[]</span> <span class="n">_imaginary</span><span class="o">)</span> <span class="o">{</span> + <span class="c1">// java implementation</span> <span class="o">}</span> - - <span class="nd">@Retention</span><span class="o">(</span><span class="n">RetentionPolicy</span><span class="o">.</span><span class="na">RUNTIME</span><span class="o">)</span> <span class="nd">@Target</span><span class="o">(</span><span class="n">ElementType</span><span class="o">.</span><span class="na">PARAMETER</span><span class="o">)</span> - <span class="kd">public</span> <span class="nd">@interface</span> <span class="n">Global</span> <span class="o">{}</span> - - <span class="nd">@Retention</span><span class="o">(</span><span class="n">RetentionPolicy</span><span class="o">.</span><span class="na">RUNTIME</span><span class="o">)</span> <span class="nd">@Target</span><span class="o">(</span><span class="n">ElementType</span><span class="o">.</span><span class="na">PARAMETER</span><span class="o">)</span> - <span class="kd">public</span> <span class="nd">@interface</span> <span class="n">Local</span> <span class="o">{}</span> - - <span class="nd">@Retention</span><span class="o">(</span><span class="n">RetentionPolicy</span><span class="o">.</span><span class="na">RUNTIME</span><span class="o">)</span> <span class="nd">@Target</span><span class="o">(</span><span class="n">ElementType</span><span class="o">.</span><span class="na">PARAMETER</span><span class="o">)</span> - <span class="kd">public</span> <span class="nd">@interface</span> <span class="n">Constant</span> <span class="o">{}</span> - - <span class="nd">@Retention</span><span class="o">(</span><span class="n">RetentionPolicy</span><span class="o">.</span><span class="na">RUNTIME</span><span class="o">)</span> <span class="nd">@Target</span><span class="o">(</span><span class="n">ElementType</span><span class="o">.</span><span class="na">PARAMETER</span><span class="o">)</span> - <span class="kd">public</span> <span class="nd">@interface</span> <span class="n">ReadWrite</span> <span class="o">{}</span> - - <span class="nd">@Retention</span><span class="o">(</span><span class="n">RetentionPolicy</span><span class="o">.</span><span class="na">RUNTIME</span><span class="o">)</span> <span class="nd">@Target</span><span class="o">(</span><span class="n">ElementType</span><span class="o">.</span><span class="na">PARAMETER</span><span class="o">)</span> - <span class="kd">public</span> <span class="nd">@interface</span> <span class="n">ReadOnly</span> <span class="o">{}</span> - - <span class="nd">@Retention</span><span class="o">(</span><span class="n">RetentionPolicy</span><span class="o">.</span><span class="na">RUNTIME</span><span class="o">)</span> <span class="nd">@Target</span><span class="o">(</span><span class="n">ElementType</span><span class="o">.</span><span class="na">PARAMETER</span><span class="o">)</span> - <span class="kd">public</span> <span class="nd">@interface</span> <span class="n">WriteOnly</span> <span class="o">{}</span> - </code></pre> - <p>And here is the example code in one chunk</p> - <pre class="highlight java"><code> - <span class="kd">public</span> <span class="kd">class</span> <span class="nc">FFT</span> <span class="kd">implements</span> <span class="n">AparapiExtensionPoint</span><span class="o">{</span> - <span class="nd">@AparapiCallable</span> <span class="kd">public</span> <span class="kt">void</span> <span class="nf">forward</span><span class="o">(</span> - <span class="nd">@Global</span> <span class="nd">@ReadWrite</span> <span class="kt">float</span><span class="o">[]</span> <span class="n">_data</span><span class="o">,</span> - <span class="nd">@Global</span> <span class="nd">@ReadWrite</span> <span class="kt">float</span><span class="o">[]</span> <span class="n">_imaginary</span><span class="o">)</span> <span class="o">{</span> - <span class="c1">// java implementation</span> - <span class="o">}</span> - - <span class="nd">@AparapiCallable</span> <span class="kd">public</span> <span class="kt">void</span> <span class="nf">reverse</span><span class="o">(</span> - <span class="nd">@Global</span> <span class="nd">@ReadWrite</span> <span class="kt">float</span><span class="o">[]</span> <span class="n">_data</span><span class="o">,</span> - <span class="nd">@Global</span> <span class="nd">@ReadWrite</span> <span class="kt">float</span><span class="o">[]</span> <span class="n">_imaginary</span><span class="o">)</span> <span class="o">{</span> - <span class="c1">// java implementation</span> - <span class="o">}</span> - - <span class="nd">@Override</span> <span class="kd">public</span> <span class="n">String</span> <span class="nf">getOpenCL</span><span class="o">()</span> <span class="o">{</span> - <span class="k">return</span> <span class="s">""</span> - <span class="o">+</span><span class="s">"void my_package_FFT_forward("</span> - <span class="o">+</span><span class="s">" __global float* _real,"</span> - <span class="o">+</span><span class="s">" __global float* _imaginary )"</span> - <span class="o">+</span><span class="s">" {"</span> - <span class="o">+</span><span class="s">" // OpenCL implemention"</span> - <span class="o">+</span><span class="s">" }"</span> - <span class="o">+</span><span class="s">"void my_package_FFT_reverse("</span> - <span class="o">+</span><span class="s">" __global float* _real,"</span> - <span class="o">+</span><span class="s">" __global float* _imaginary )"</span> - <span class="o">+</span><span class="s">" {"</span> - <span class="o">+</span><span class="s">" // OpenCL implemention"</span> - <span class="o">+</span><span class="s">" }"</span><span class="o">;</span> + + <span class="nd">@Override</span> <span class="kd">public</span> <span class="n">String</span> <span class="nf">getOpenCL</span><span class="o">()</span> <span class="o">{</span> + <span class="k">return</span> <span class="s">""</span> + <span class="o">+</span><span class="s">"void my_package_FFT_forward("</span> + <span class="o">+</span><span class="s">" __global float* _real,"</span> + <span class="o">+</span><span class="s">" __global float* _imaginary )"</span> + <span class="o">+</span><span class="s">" {"</span> + <span class="o">+</span><span class="s">" // OpenCL implemention"</span> + <span class="o">+</span><span class="s">" }"</span> + <span class="o">+</span><span class="s">"void my_package_FFT_reverse("</span> + <span class="o">+</span><span class="s">" __global float* _real,"</span> + <span class="o">+</span><span class="s">" __global float* _imaginary )"</span> + <span class="o">+</span><span class="s">" {"</span> + <span class="o">+</span><span class="s">" // OpenCL implemention"</span> + <span class="o">+</span><span class="s">" }"</span><span class="o">;</span> + <span class="o">}</span> +<span class="o">}</span> +</code></pre></div> +<p>The implementer’s class will be required to define the callable aparapi methods as well as implement the <code>getOpenCL()</code> method so that the OpenCL implementation of those methods can be extracted at run-time.</p> + +<p>Aparapi will provide annotations to decorate the methods and args/parameters of the exposed callable methods . These annotations provide information so that Aparapi locate the callable methods as well as parameter hints to help coordinate buffer types (global, local, constant) and transfer directions (read,write, readWrite) when executing the methods from a Kernel. This information is consulted during the normal bytecode analysis that Aparapi provides when Aparapi hits the call site.</p> + +<p>Note that the Java code inside the <code>@AparapiCallable</code> functions (or code executed from it) is not constrained to the normal Aparapi subset. It can be any legitimate Java code, but should be thread safe (because it will be called from JTP mode!).</p> + +<p>Note also that the OpenCL code yielded from the <code>getOpenCL()</code> method is assumed to be complete, Aparapi does not attempt to parse this code. If the code fails to compile Aparapi will fallback and execute the whole Kernel in JTP mode.</p> + +<p>BTW we show getOpenCL() returning a String literal. This is most likely to be how code is returned. However, it could be extracted from a File? a resource in the Jar file? or dynamically generated based on some state. For example an FFT implementation might choose to use different code for radix2 or radix4 implementations (based on a paramater passed to <code>FFT()</code> constructor - say <code>FFT(FFT.RADIX2))</code> in which case the getOpenCL() method might yield different code.</p> + +<p>The above proposal covers the case where a third party might want to provide an Aparapi extension point as a library.</p> + +<p>We might also consider allowing single methods within the Kernel to be optimized, where the OpenCL is made available via the AparapiCallable annotation. The method would still use the same Annotations for the args (to allow buffer txfers to be optimized).</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="n">Kernel</span> <span class="n">k</span> <span class="o">=</span> <span class="k">new</span> <span class="n">Kernel</span><span class="o">(){</span> + <span class="nd">@AparapiCallable</span><span class="o">(</span><span class="err">â€</span> <span class="cm">/* opencl code for sum() goes here */</span><span class="err">â€</span><span class="o">)</span> + <span class="kt">int</span> <span class="nf">sum</span><span class="o">(</span><span class="nd">@Global</span> <span class="nd">@ReadWrite</span> <span class="kt">int</span><span class="o">[]</span> <span class="n">data</span><span class="o">,</span> <span class="kt">int</span> <span class="n">length</span><span class="o">){</span> + <span class="kt">int</span> <span class="n">sum</span> <span class="o">=</span> <span class="mi">0</span><span class="o">;</span> + <span class="k">for</span> <span class="o">(</span><span class="kt">int</span> <span class="nl">v:</span><span class="n">data</span><span class="o">){</span> + <span class="n">sum</span><span class="o">+=</span><span class="n">v</span><span class="o">;</span> <span class="o">}</span> - <span class="o">}</span> - - <span class="kd">public</span> <span class="kd">class</span> <span class="nc">BandStopFilter</span> <span class="kd">extends</span> <span class="n">Kernel</span><span class="o">{</span> - <span class="n">FFT</span> <span class="n">fft</span> <span class="o">=</span> <span class="k">new</span> <span class="n">FFT</span><span class="o">();</span> - <span class="kt">float</span><span class="o">[]</span> <span class="n">real</span><span class="o">;</span> - <span class="kt">float</span><span class="o">[]</span> <span class="n">imaginary</span><span class="o">;</span> - - <span class="n">BandStopFilter</span> <span class="o">(</span><span class="kt">float</span><span class="o">[]</span> <span class="n">_real</span><span class="o">){</span> - <span class="n">real</span> <span class="o">=</span> <span class="n">_real</span><span class="o">;</span> - <span class="n">imaginary</span> <span class="o">=</span> <span class="k">new</span> <span class="kt">float</span><span class="o">[</span><span class="n">_real</span><span class="o">.</span><span class="na">length</span><span class="o">];</span> - - <span class="o">}</span> - - <span class="nd">@Override</span> <span class="kd">public</span> <span class="kt">void</span> <span class="nf">run</span><span class="o">()</span> <span class="o">{</span> - <span class="n">fft</span><span class="o">.</span><span class="na">forward</span><span class="o">(</span><span class="n">real</span><span class="o">,</span> <span class="n">imaginary</span><span class="o">);</span> - <span class="o">}</span> - <span class="o">}</span> - - <span class="kd">public</span> <span class="kd">static</span> <span class="kt">void</span> <span class="nf">main</span><span class="o">(</span><span class="n">String</span><span class="o">[]</span> <span class="n">args</span><span class="o">)</span> <span class="o">{</span> - <span class="kt">float</span><span class="o">[]</span> <span class="n">data</span> <span class="o">=</span> <span class="k">new</span> <span class="kt">float</span><span class="o">[</span><span class="mi">1024</span><span class="o">];</span> - <span class="n">BandStopFilter</span> <span class="n">kernel</span> <span class="o">=</span> <span class="k">new</span> <span class="n">BandStopFilter</span> <span class="o">(</span><span class="n">data</span><span class="o">);</span> - <span class="n">kernel</span><span class="o">.</span><span class="na">execute</span><span class="o">(</span><span class="n">data</span><span class="o">.</span><span class="na">length</span><span class="o">);</span> - <span class="o">}</span> - </code></pre> - <p>After discussion I think we are converging on a less complex solution. This is based on Witold’s feedback suggestion (see below) where we use OpenCL annotations rather than forcing the implementation of the interface and the <code>getOpenCL()</code> method as originally suggested.</p> - - <p>So we will create an <code>@OpenCL</code> annotation for classes/methods.</p> - - <p>The <code>@OpenCL</code> annotation on the methods will contain the OpenCL source replacement for a specific method. The arg list will be created by Aparapi.</p> - - <p>The @OpenCL annotation on a class allows us to optionally introduce common code (helper methods, #pragmas, constants) which will precede the method declarations in the OpenCL code.</p> - - <p>So an FFT example whereby forward() and reverse() methods both called a common foo() method might look like this.</p> - <pre class="highlight java"><code> - <span class="nd">@OpenCL</span><span class="o">(</span><span class="n">common</span><span class="o">=</span><span class="s">"/* common void foo(){} + maybe #pragmas + accessable - global fields declared here */"</span><span class="o">)</span> - <span class="kd">public</span> <span class="kd">class</span> <span class="nc">FFT</span> <span class="kd">extends</span> <span class="n">AparapiExtensionPoint</span> <span class="o">{</span> - <span class="nd">@OpenCL</span><span class="o">(</span><span class="n">signature</span><span class="o">=</span><span class="s">"//function signature - OPTIONAL"</span><span class="o">,</span> <span class="n">body</span><span class="o">=</span><span class="s">"{ /* uses foo(); */ }"</span><span class="o">)</span> - <span class="kd">public</span> <span class="kt">void</span> <span class="nf">forward</span><span class="o">(</span> - <span class="nd">@Global</span> <span class="nd">@ReadWrite</span> <span class="kt">float</span><span class="o">[]</span> <span class="n">_data</span><span class="o">,</span> - <span class="nd">@Global</span> <span class="nd">@ReadWrite</span> <span class="kt">float</span><span class="o">[]</span> <span class="n">_imaginary</span><span class="o">)</span> <span class="o">{</span> - <span class="c1">// java implementation</span> - <span class="o">}</span> - <span class="nd">@OpenCL</span><span class="o">(</span><span class="n">function</span><span class="o">=</span><span class="s">"{ /*uses foo(); */) }"</span><span class="o">)</span> - <span class="kd">public</span> <span class="kt">void</span> <span class="nf">reverse</span><span class="o">(</span> - <span class="nd">@Global</span> <span class="nd">@ReadWrite</span> <span class="kt">float</span><span class="o">[]</span> <span class="n">_data</span><span class="o">,</span> - <span class="nd">@Global</span> <span class="nd">@ReadWrite</span> <span class="kt">float</span><span class="o">[]</span> <span class="n">_imaginary</span><span class="o">)</span> <span class="o">{</span> - <span class="c1">// java implementation</span> - <span class="o">}</span> - <span class="o">}</span> - <span class="o">}</span> - </code></pre> - <p>To invoke from an Aparapi kernel. We should be able to do something like</p> - <pre class="highlight java"><code> - <span class="kd">public</span> <span class="kd">class</span> <span class="nc">BandStopFilter</span> <span class="kd">extends</span> <span class="n">Kernel</span><span class="o">{</span> - <span class="n">FFT</span> <span class="n">fft</span> <span class="o">=</span> <span class="k">new</span> <span class="n">FFT</span><span class="o">();</span> - <span class="kt">float</span><span class="o">[]</span> <span class="n">real</span><span class="o">;</span> - <span class="kt">float</span><span class="o">[]</span> <span class="n">imaginary</span><span class="o">;</span> - - <span class="n">BandStopFilter</span> <span class="o">(</span><span class="kt">float</span><span class="o">[]</span> <span class="n">_real</span><span class="o">){</span> - <span class="n">real</span> <span class="o">=</span> <span class="n">_real</span><span class="o">;</span> - <span class="n">imaginary</span> <span class="o">=</span> <span class="k">new</span> <span class="kt">float</span><span class="o">[</span><span class="n">_real</span><span class="o">.</span><span class="na">length</span><span class="o">];</span> - - <span class="o">}</span> - - <span class="nd">@Override</span> <span class="kd">public</span> <span class="kt">void</span> <span class="nf">run</span><span class="o">()</span> <span class="o">{</span> - <span class="n">fft</span><span class="o">.</span><span class="na">forward</span><span class="o">(</span><span class="k">this</span><span class="o">,</span> <span class="n">real</span><span class="o">,</span> <span class="n">imaginary</span><span class="o">);</span> - <span class="o">}</span> - <span class="o">}</span> - - <span class="kd">public</span> <span class="kd">static</span> <span class="kt">void</span> <span class="nf">main</span><span class="o">(</span><span class="n">String</span><span class="o">[]</span> <span class="n">args</span><span class="o">)</span> <span class="o">{</span> - <span class="kt">float</span><span class="o">[]</span> <span class="n">data</span> <span class="o">=</span> <span class="k">new</span> <span class="kt">float</span><span class="o">[</span><span class="mi">1024</span><span class="o">];</span> - <span class="n">BandStopFilter</span> <span class="n">kernel</span> <span class="o">=</span> <span class="k">new</span> <span class="n">BandStopFilter</span> <span class="o">(</span><span class="n">data</span><span class="o">);</span> - <span class="n">kernel</span><span class="o">.</span><span class="na">execute</span><span class="o">(</span><span class="n">data</span><span class="o">.</span><span class="na">length</span><span class="o">);</span> + <span class="o">}</span> + <span class="nd">@Override</span> <span class="kd">public</span> <span class="kt">void</span> <span class="nf">run</span><span class="o">(){</span> + <span class="n">sum</span><span class="o">(</span><span class="n">data</span><span class="o">);</span> + <span class="o">}</span> +<span class="o">}</span> +</code></pre></div> +<p>Here are the proposed new interfaces/annotations</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="kd">public</span> <span class="kd">interface</span> <span class="nc">AparapiExtensionPoint</span> <span class="o">{</span> + <span class="kd">public</span> <span class="n">String</span> <span class="nf">getOpenCL</span><span class="o">();</span> +<span class="o">}</span> +<span class="nd">@Retention</span><span class="o">(</span><span class="n">RetentionPolicy</span><span class="o">.</span><span class="na">RUNTIME</span><span class="o">)</span> <span class="nd">@Target</span><span class="o">(</span><span class="n">ElementType</span><span class="o">.</span><span class="na">METHOD</span><span class="o">)</span> +<span class="kd">public</span> <span class="nd">@interface</span> <span class="n">AparapiCallable</span> <span class="o">{</span> + <span class="n">String</span> <span class="n">value</span> <span class="k">default</span> <span class="n">NULL</span><span class="o">;</span> +<span class="o">}</span> + +<span class="nd">@Retention</span><span class="o">(</span><span class="n">RetentionPolicy</span><span class="o">.</span><span class="na">RUNTIME</span><span class="o">)</span> <span class="nd">@Target</span><span class="o">(</span><span class="n">ElementType</span><span class="o">.</span><span class="na">PARAMETER</span><span class="o">)</span> +<span class="kd">public</span> <span class="nd">@interface</span> <span class="n">Global</span> <span class="o">{}</span> + +<span class="nd">@Retention</span><span class="o">(</span><span class="n">RetentionPolicy</span><span class="o">.</span><span class="na">RUNTIME</span><span class="o">)</span> <span class="nd">@Target</span><span class="o">(</span><span class="n">ElementType</span><span class="o">.</span><span class="na">PARAMETER</span><span class="o">)</span> +<span class="kd">public</span> <span class="nd">@interface</span> <span class="n">Local</span> <span class="o">{}</span> + +<span class="nd">@Retention</span><span class="o">(</span><span class="n">RetentionPolicy</span><span class="o">.</span><span class="na">RUNTIME</span><span class="o">)</span> <span class="nd">@Target</span><span class="o">(</span><span class="n">ElementType</span><span class="o">.</span><span class="na">PARAMETER</span><span class="o">)</span> +<span class="kd">public</span> <span class="nd">@interface</span> <span class="n">Constant</span> <span class="o">{}</span> + +<span class="nd">@Retention</span><span class="o">(</span><span class="n">RetentionPolicy</span><span class="o">.</span><span class="na">RUNTIME</span><span class="o">)</span> <span class="nd">@Target</span><span class="o">(</span><span class="n">ElementType</span><span class="o">.</span><span class="na">PARAMETER</span><span class="o">)</span> +<span class="kd">public</span> <span class="nd">@interface</span> <span class="n">ReadWrite</span> <span class="o">{}</span> + +<span class="nd">@Retention</span><span class="o">(</span><span class="n">RetentionPolicy</span><span class="o">.</span><span class="na">RUNTIME</span><span class="o">)</span> <span class="nd">@Target</span><span class="o">(</span><span class="n">ElementType</span><span class="o">.</span><span class="na">PARAMETER</span><span class="o">)</span> +<span class="kd">public</span> <span class="nd">@interface</span> <span class="n">ReadOnly</span> <span class="o">{}</span> + +<span class="nd">@Retention</span><span class="o">(</span><span class="n">RetentionPolicy</span><span class="o">.</span><span class="na">RUNTIME</span><span class="o">)</span> <span class="nd">@Target</span><span class="o">(</span><span class="n">ElementType</span><span class="o">.</span><span class="na">PARAMETER</span><span class="o">)</span> +<span class="kd">public</span> <span class="nd">@interface</span> <span class="n">WriteOnly</span> <span class="o">{}</span> +</code></pre></div> +<p>And here is the example code in one chunk</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="kd">public</span> <span class="kd">class</span> <span class="nc">FFT</span> <span class="kd">implements</span> <span class="n">AparapiExtensionPoint</span><span class="o">{</span> + <span class="nd">@AparapiCallable</span> <span class="kd">public</span> <span class="kt">void</span> <span class="nf">forward</span><span class="o">(</span> + <span class="nd">@Global</span> <span class="nd">@ReadWrite</span> <span class="kt">float</span><span class="o">[]</span> <span class="n">_data</span><span class="o">,</span> + <span class="nd">@Global</span> <span class="nd">@ReadWrite</span> <span class="kt">float</span><span class="o">[]</span> <span class="n">_imaginary</span><span class="o">)</span> <span class="o">{</span> + <span class="c1">// java implementation</span> + <span class="o">}</span> + + <span class="nd">@AparapiCallable</span> <span class="kd">public</span> <span class="kt">void</span> <span class="nf">reverse</span><span class="o">(</span> + <span class="nd">@Global</span> <span class="nd">@ReadWrite</span> <span class="kt">float</span><span class="o">[]</span> <span class="n">_data</span><span class="o">,</span> + <span class="nd">@Global</span> <span class="nd">@ReadWrite</span> <span class="kt">float</span><span class="o">[]</span> <span class="n">_imaginary</span><span class="o">)</span> <span class="o">{</span> + <span class="c1">// java implementation</span> + <span class="o">}</span> + + <span class="nd">@Override</span> <span class="kd">public</span> <span class="n">String</span> <span class="nf">getOpenCL</span><span class="o">()</span> <span class="o">{</span> + <span class="k">return</span> <span class="s">""</span> + <span class="o">+</span><span class="s">"void my_package_FFT_forward("</span> + <span class="o">+</span><span class="s">" __global float* _real,"</span> + <span class="o">+</span><span class="s">" __global float* _imaginary )"</span> + <span class="o">+</span><span class="s">" {"</span> + <span class="o">+</span><span class="s">" // OpenCL implemention"</span> + <span class="o">+</span><span class="s">" }"</span> + <span class="o">+</span><span class="s">"void my_package_FFT_reverse("</span> + <span class="o">+</span><span class="s">" __global float* _real,"</span> + <span class="o">+</span><span class="s">" __global float* _imaginary )"</span> + <span class="o">+</span><span class="s">" {"</span> + <span class="o">+</span><span class="s">" // OpenCL implemention"</span> + <span class="o">+</span><span class="s">" }"</span><span class="o">;</span> + <span class="o">}</span> +<span class="o">}</span> + +<span class="kd">public</span> <span class="kd">class</span> <span class="nc">BandStopFilter</span> <span class="kd">extends</span> <span class="n">Kernel</span><span class="o">{</span> + <span class="n">FFT</span> <span class="n">fft</span> <span class="o">=</span> <span class="k">new</span> <span class="n">FFT</span><span class="o">();</span> + <span class="kt">float</span><span class="o">[]</span> <span class="n">real</span><span class="o">;</span> + <span class="kt">float</span><span class="o">[]</span> <span class="n">imaginary</span><span class="o">;</span> + + <span class="n">BandStopFilter</span> <span class="o">(</span><span class="kt">float</span><span class="o">[]</span> <span class="n">_real</span><span class="o">){</span> + <span class="n">real</span> <span class="o">=</span> <span class="n">_real</span><span class="o">;</span> + <span class="n">imaginary</span> <span class="o">=</span> <span class="k">new</span> <span class="kt">float</span><span class="o">[</span><span class="n">_real</span><span class="o">.</span><span class="na">length</span><span class="o">];</span> + + <span class="o">}</span> + + <span class="nd">@Override</span> <span class="kd">public</span> <span class="kt">void</span> <span class="nf">run</span><span class="o">()</span> <span class="o">{</span> + <span class="n">fft</span><span class="o">.</span><span class="na">forward</span><span class="o">(</span><span class="n">real</span><span class="o">,</span> <span class="n">imaginary</span><span class="o">);</span> + <span class="o">}</span> +<span class="o">}</span> + +<span class="kd">public</span> <span class="kd">static</span> <span class="kt">void</span> <span class="nf">main</span><span class="o">(</span><span class="n">String</span><span class="o">[]</span> <span class="n">args</span><span class="o">)</span> <span class="o">{</span> + <span class="kt">float</span><span class="o">[]</span> <span class="n">data</span> <span class="o">=</span> <span class="k">new</span> <span class="kt">float</span><span class="o">[</span><span class="mi">1024</span><span class="o">];</span> + <span class="n">BandStopFilter</span> <span class="n">kernel</span> <span class="o">=</span> <span class="k">new</span> <span class="n">BandStopFilter</span> <span class="o">(</span><span class="n">data</span><span class="o">);</span> + <span class="n">kernel</span><span class="o">.</span><span class="na">execute</span><span class="o">(</span><span class="n">data</span><span class="o">.</span><span class="na">length</span><span class="o">);</span> +<span class="o">}</span> +</code></pre></div> +<p>After discussion I think we are converging on a less complex solution. This is based on Witold’s feedback suggestion (see below) where we use OpenCL annotations rather than forcing the implementation of the interface and the <code>getOpenCL()</code> method as originally suggested.</p> + +<p>So we will create an <code>@OpenCL</code> annotation for classes/methods.</p> + +<p>The <code>@OpenCL</code> annotation on the methods will contain the OpenCL source replacement for a specific method. The arg list will be created by Aparapi.</p> + +<p>The @OpenCL annotation on a class allows us to optionally introduce common code (helper methods, #pragmas, constants) which will precede the method declarations in the OpenCL code.</p> + +<p>So an FFT example whereby forward() and reverse() methods both called a common foo() method might look like this.</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="nd">@OpenCL</span><span class="o">(</span><span class="n">common</span><span class="o">=</span><span class="s">"/* common void foo(){} + maybe #pragmas + accessable +global fields declared here */"</span><span class="o">)</span> +<span class="kd">public</span> <span class="kd">class</span> <span class="nc">FFT</span> <span class="kd">extends</span> <span class="n">AparapiExtensionPoint</span> <span class="o">{</span> + <span class="nd">@OpenCL</span><span class="o">(</span><span class="n">signature</span><span class="o">=</span><span class="s">"//function signature - OPTIONAL"</span><span class="o">,</span> <span class="n">body</span><span class="o">=</span><span class="s">"{ /* uses foo(); */ }"</span><span class="o">)</span> + <span class="kd">public</span> <span class="kt">void</span> <span class="nf">forward</span><span class="o">(</span> + <span class="nd">@Global</span> <span class="nd">@ReadWrite</span> <span class="kt">float</span><span class="o">[]</span> <span class="n">_data</span><span class="o">,</span> + <span class="nd">@Global</span> <span class="nd">@ReadWrite</span> <span class="kt">float</span><span class="o">[]</span> <span class="n">_imaginary</span><span class="o">)</span> <span class="o">{</span> + <span class="c1">// java implementation</span> + <span class="o">}</span> + <span class="nd">@OpenCL</span><span class="o">(</span><span class="n">function</span><span class="o">=</span><span class="s">"{ /*uses foo(); */) }"</span><span class="o">)</span> + <span class="kd">public</span> <span class="kt">void</span> <span class="nf">reverse</span><span class="o">(</span> + <span class="nd">@Global</span> <span class="nd">@ReadWrite</span> <span class="kt">float</span><span class="o">[]</span> <span class="n">_data</span><span class="o">,</span> + <span class="nd">@Global</span> <span class="nd">@ReadWrite</span> <span class="kt">float</span><span class="o">[]</span> <span class="n">_imaginary</span><span class="o">)</span> <span class="o">{</span> + <span class="c1">// java implementation</span> <span class="o">}</span> - <span class="o">}</span> - </code></pre> - <p>Ideally we would also like to invoke FFT directly (instead of via a Kernel). This is tricky because the forward()} and reverse() methods will need to be invoked across a range and of course the dispatch across the range needs to be initiated from Aparapi.</p> - - <p>The only way I can see how to do this is to force the creation of an interface so we can use Java’s existing Proxy mechanism to create a wrapper.</p> - <pre class="highlight java"><code> - <span class="nd">@OpenCL</span><span class="o">(</span><span class="n">wraps</span><span class="o">=</span><span class="n">FFT</span><span class="o">.</span><span class="na">class</span><span class="o">);</span> - <span class="kd">interface</span> <span class="nc">FFTInterface</span><span class="o">{</span> - <span class="kd">public</span> <span class="kt">void</span> <span class="nf">forward</span><span class="o">(</span> <span class="n">Range</span> <span class="n">_range</span><span class="o">,</span> <span class="kt">float</span><span class="o">[]</span> <span class="n">_data</span><span class="o">,</span> <span class="kt">float</span><span class="o">[]</span> <span class="n">_imaginary</span><span class="o">);</span> - <span class="kd">public</span> <span class="kt">void</span> <span class="nf">reverse</span><span class="o">(</span> <span class="n">Range</span> <span class="n">_range</span><span class="o">,</span> <span class="kt">float</span><span class="o">[]</span> <span class="n">_data</span><span class="o">,</span> <span class="kt">float</span><span class="o">[]</span> <span class="n">_imaginary</span><span class="o">);</span> - <span class="o">}</span> - <span class="n">Then</span> <span class="n">provide</span> <span class="n">a</span> <span class="n">mechanism</span> <span class="k">for</span> <span class="n">extracting</span> <span class="n">a</span> <span class="n">proxy</span> <span class="n">and</span> <span class="n">invoking</span> <span class="n">it</span><span class="o">.</span> - - <span class="kt">float</span><span class="o">[]</span> <span class="n">real</span> <span class="o">=</span> <span class="c1">//??</span> - <span class="kt">float</span><span class="o">[]</span> <span class="n">imag</span> <span class="o">=</span> <span class="c1">//??</span> - <span class="n">Aparapi</span><span class="o">.</span><span class="na">wrap</span><span class="o"><</span><span class="n">FFT</span><span class="o">>(</span><span class="n">FFTInterface</span><span class="o">.</span><span class="na">class</span><span class="o">).</span><span class="na">forward</span><span class="o">(</span><span class="n">range</span><span class="o">,</span> <span class="n">real</span><span class="o">,</span> <span class="n">imag</span><span class="o">);</span> - </code></pre> - <p>I can’t see a cleaner solution.</p> - </div> - </main> - <footer class='page-footer'> - <div class='container'> - <div class='row'> - <div class='col l4 s12'> - <h5 class='white-text'>Help Aparapi Grow</h5> - <p class='grey-text text-lighten-4'>We are a team of volunteers working on this project like it's our full time job. Any amount would help support and continue development on this project and is greatly appreciated.</p> - <form action='https://www.paypal.com/cgi-bin/webscr' id='paypal-donate' method='post' target='_top'> - <input name='cmd' type='hidden' value='_s-xclick'> - <input name='encrypted' type='hidden' value='-----BEGIN PKCS7-----MIIHoAYJKoZIhvcNAQcEoIIHkTCCB40CAQExggEwMIIBLAIBADCBlDCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20CAQAwDQYJKoZIhvcNAQEBBQAEgYATcKxN8t35TG2x34eY272SuZO3QbGy+BTGIM5DRV6Hmosotzw2TF42ceWmbXb3Gk4Wy5kUgo4TgHExCZHUSlHUl+A9KWLFejotgQJPhbiBsnns3klWbKftA3LEnP/kz/SW7OyBlpluoHoEGb354/aoX3JEctp3akHiZEmD7JyEgjELMAkGBSsOAwIaBQAwggEcBgkqhkiG9w0BBwEwFAYIKoZIhvcNAwcECOGCJwba6JICgIH4RtE1LE3juagKs+swI5tb9Y2LacWo+qn1H1aLKeg57bQMqqcWYvkoO1joYoglPc1h4mO0egZjHPQ6ih0K0IYlXw2SRpNylSlIMUE3GW6smjSSwRhscZfXQYUnmQsfYvkFwoKrlZGf/1u0Q7nwlZ1szIKnDMZ5f+k8xBcM0sMNutn/y9CH6A3zo01gQBIF29+1WYAoQspNAnfWQy3ydV7nbjIA9ThDp2WquWw3EVlvqlvm/3C2AFuH/L4q0ltn3qjkCdzXK0O2jW3TRrzligPkAy6CN0Tw2jGW5GENNC1L92vHFH4kBXUPlhvw39TgoN7/KRUjVoYPYgugggOHMIIDgzCCAuygAwIBAgIBADANBgkqhkiG9w0BAQUFADCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20wHhcNMDQwMjEzMTAxMzE1WhcNMzUwMjEzMTAxMzE1WjCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20wgZ8wDQYJKoZIhvcNAQEBBQADgY0AMIGJAoGBAMFHTt38RMxLXJyO2SmS+Ndl72T7oKJ4u4uw+6awntALWh03PewmIJuzbALScsTS4sZoS1fKciBGoh11gIfHzylvkdNe/hJl66/RGqrj5rFb08sAABNTzDTiqqNpJeBsYs/c2aiGozptX2RlnBktH+SUNpAajW724Nv2Wvhif6sFAgMBAAGjge4wgeswHQYDVR0OBBYEFJaffLvGbxe9WT9S1wob7BDWZJRrMIG7BgNVHSMEgbMwgbCAFJaffLvGbxe9WT9S1wob7BDWZJRroYGUpIGRMIGOMQswCQYDVQQGEwJVUzELMAkGA1UECBMCQ0ExFjAUBgNVBAcTDU1vdW50YWluIFZpZXcxFDASBgNVBAoTC1BheVBhbCBJbmMuMRMwEQYDVQQLFApsaXZlX2NlcnRzMREwDwYDVQQDFAhsaXZlX2FwaTEcMBoGCSqGSIb3DQEJARYNcmVAcGF5cGFsLmNvbYIBADAMBgNVHRMEBTADAQH/MA0GCSqGSIb3DQEBBQUAA4GBAIFfOlaagFrl71+jq6OKidbWFSE+Q4FqROvdgIONth+8kSK//Y/4ihuE4Ymvzn5ceE3S/iBSQQMjyvb+s2TWbQYDwcp129OPIbD9epdr4tJOUNiSojw7BHwYRiPh58S1xGlFgHFXwrEBb3dgNbMUa+u4qectsMAXpVHnD9wIyfmHMYIBmjCCAZYCAQEwgZQwgY4xCzAJBgNVBAYTAlVTMQswCQYDVQQIEwJDQTEWMBQGA1UEBxMNTW91bnRhaW4gVmlldzEUMBIGA1UEChMLUGF5UGFsIEluYy4xEzARBgNVBAsUCmxpdmVfY2VydHMxETAPBgNVBAMUCGxpdmVfYXBpMRwwGgYJKoZIhvcNAQkBFg1yZUBwYXlwYWwuY29tAgEAMAkGBSsOAwIaBQCgXTAYBgkqhkiG9w0BCQMxCwYJKoZIhvcNAQcBMBwGCSqGSIb3DQEJBTEPFw0xNjExMjkyMjA1NTNaMCMGCSqGSIb3DQEJBDEWBBS4i3Exr/pFcKOJy8uKmH+nGIMjqDANBgkqhkiG9w0BAQEFAASBgDAbFZ2jieloeB/0wCAcvYCFAIXmmBaMS5js/byzU5gK7exSTlRMX74IkmHemItaOcw3wyFlu4i118D9K9SbSbFiX9DGDcezGh42u/6G8TuZMwlvmiehwMioTVcm4jWG40YLiv8pJZypfoSx2w4IAFb4na5i/E1qOrwQOpiBho+s-----END PKCS7-----'> - <button alt='PayPal - The safer, easier way to pay online!' class='btn waves-effect waves-light red lighten-3' name='action' type='submit'> - Donate Now - </button> - </form> - </div> - <div class='col l4 s12'> - <h5 class='white-text'>Join the Discussion</h5> - <p class='grey-text text-lighten-4'>We have a Gitter chat room set up where you can talk directly with us. Come in and discuss new features, future goals, general problems or questions, or anything else you can think of.</p> - <a class='btn waves-effect waves-light red lighten-3' href='https://gitter.im/Syncleus/aparapi' target='_blank'>Chat</a> - </div> - <div class='col l4 s12' style='overflow: hidden;'> - <h5 class='white-text'>Connect</h5> - <iframe allowtransparency='true' frameborder='0' height='30' scrolling='0' src='http://ghbtns.com/github-btn.html?user=Syncleus&repo=aparapi&type=watch&count=true&size=large' width='170'></iframe> - <br> - <a class='twitter-follow-button' data-dnt='true' data-show-count='true' data-size='large' href='https://twitter.com/AparapiLib'>Follow @AparapiLib</a> - <br> - <div class='g-follow' data-annotation='bubble' data-height='24' data-href='https://plus.google.com/102266131584900704956' data-rel='publisher'></div> - </div> - </div> - </div> - <div class='footer-copyright'> - <div class='container'> - © 2016-2017 Syncleus, All rights reserved. - <a class='grey-text text-lighten-4 right' href='https://github.com/Syncleus/aparapi/blob/master/LICENSE'>Apache License v2</a> - </div> - </div> - </footer> - <!-- Scripts --> - <script src='https://code.jquery.com/jquery-2.1.4.min.js'></script> - <script> - if (!window.jQuery) { document.write('<script src="bin/jquery-2.1.1.min.js"><\/script>'); } - </script> - <script src='/javascripts/jquery.timeago.js'></script> - <script src='/javascripts/materialize.min.js'></script> - <script src='/javascripts/lunr.min.js'></script> - <script src='/javascripts/search.js'></script> - <script src='/javascripts/materialize.js'></script> - <script src='/javascripts/init.js'></script> - <!-- Twitter Button --> - <script> - !function(d,s,id){var js,fjs=d.getElementsByTagName(s)[0],p=/^http:/.test(d.location)?'http':'https';if(!d.getElementById(id)){js=d.createElement(s);js.id=id;js.src=p+'://platform.twitter.com/widgets.js';fjs.parentNode.insertBefore(js,fjs);}}(document, 'script', 'twitter-wjs'); - </script> - <!-- Google Plus Button --> - <script async='' defer='defer' src='https://apis.google.com/js/platform.js'></script> - </body> + <span class="o">}</span> +<span class="o">}</span> +</code></pre></div> +<p>To invoke from an Aparapi kernel. We should be able to do something like</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="kd">public</span> <span class="kd">class</span> <span class="nc">BandStopFilter</span> <span class="kd">extends</span> <span class="n">Kernel</span><span class="o">{</span> + <span class="n">FFT</span> <span class="n">fft</span> <span class="o">=</span> <span class="k">new</span> <span class="n">FFT</span><span class="o">();</span> + <span class="kt">float</span><span class="o">[]</span> <span class="n">real</span><span class="o">;</span> + <span class="kt">float</span><span class="o">[]</span> <span class="n">imaginary</span><span class="o">;</span> + + <span class="n">BandStopFilter</span> <span class="o">(</span><span class="kt">float</span><span class="o">[]</span> <span class="n">_real</span><span class="o">){</span> + <span class="n">real</span> <span class="o">=</span> <span class="n">_real</span><span class="o">;</span> + <span class="n">imaginary</span> <span class="o">=</span> <span class="k">new</span> <span class="kt">float</span><span class="o">[</span><span class="n">_real</span><span class="o">.</span><span class="na">length</span><span class="o">];</span> + + <span class="o">}</span> + + <span class="nd">@Override</span> <span class="kd">public</span> <span class="kt">void</span> <span class="nf">run</span><span class="o">()</span> <span class="o">{</span> + <span class="n">fft</span><span class="o">.</span><span class="na">forward</span><span class="o">(</span><span class="k">this</span><span class="o">,</span> <span class="n">real</span><span class="o">,</span> <span class="n">imaginary</span><span class="o">);</span> + <span class="o">}</span> + <span class="o">}</span> + + <span class="kd">public</span> <span class="kd">static</span> <span class="kt">void</span> <span class="nf">main</span><span class="o">(</span><span class="n">String</span><span class="o">[]</span> <span class="n">args</span><span class="o">)</span> <span class="o">{</span> + <span class="kt">float</span><span class="o">[]</span> <span class="n">data</span> <span class="o">=</span> <span class="k">new</span> <span class="kt">float</span><span class="o">[</span><span class="mi">1024</span><span class="o">];</span> + <span class="n">BandStopFilter</span> <span class="n">kernel</span> <span class="o">=</span> <span class="k">new</span> <span class="n">BandStopFilter</span> <span class="o">(</span><span class="n">data</span><span class="o">);</span> + <span class="n">kernel</span><span class="o">.</span><span class="na">execute</span><span class="o">(</span><span class="n">data</span><span class="o">.</span><span class="na">length</span><span class="o">);</span> + <span class="o">}</span> +<span class="o">}</span> +</code></pre></div> +<p>Ideally we would also like to invoke FFT directly (instead of via a Kernel). This is tricky because the forward()} and reverse() methods will need to be invoked across a range and of course the dispatch across the range needs to be initiated from Aparapi.</p> + +<p>The only way I can see how to do this is to force the creation of an interface so we can use Java’s existing Proxy mechanism to create a wrapper.</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="nd">@OpenCL</span><span class="o">(</span><span class="n">wraps</span><span class="o">=</span><span class="n">FFT</span><span class="o">.</span><span class="na">class</span><span class="o">);</span> +<span class="kd">interface</span> <span class="nc">FFTInterface</span><span class="o">{</span> + <span class="kd">public</span> <span class="kt">void</span> <span class="nf">forward</span><span class="o">(</span> <span class="n">Range</span> <span class="n">_range</span><span class="o">,</span> <span class="kt">float</span><span class="o">[]</span> <span class="n">_data</span><span class="o">,</span> <span class="kt">float</span><span class="o">[]</span> <span class="n">_imaginary</span><span class="o">);</span> + <span class="kd">public</span> <span class="kt">void</span> <span class="nf">reverse</span><span class="o">(</span> <span class="n">Range</span> <span class="n">_range</span><span class="o">,</span> <span class="kt">float</span><span class="o">[]</span> <span class="n">_data</span><span class="o">,</span> <span class="kt">float</span><span class="o">[]</span> <span class="n">_imaginary</span><span class="o">);</span> +<span class="o">}</span> +<span class="n">Then</span> <span class="n">provide</span> <span class="n">a</span> <span class="n">mechanism</span> <span class="k">for</span> <span class="n">extracting</span> <span class="n">a</span> <span class="n">proxy</span> <span class="n">and</span> <span class="n">invoking</span> <span class="n">it</span><span class="o">.</span> + +<span class="kt">float</span><span class="o">[]</span> <span class="n">real</span> <span class="o">=</span> <span class="c1">//??</span> +<span class="kt">float</span><span class="o">[]</span> <span class="n">imag</span> <span class="o">=</span> <span class="c1">//??</span> +<span class="n">Aparapi</span><span class="o">.</span><span class="na">wrap</span><span class="o"><</span><span class="n">FFT</span><span class="o">>(</span><span class="n">FFTInterface</span><span class="o">.</span><span class="na">class</span><span class="o">).</span><span class="na">forward</span><span class="o">(</span><span class="n">range</span><span class="o">,</span> <span class="n">real</span><span class="o">,</span> <span class="n">imag</span><span class="o">);</span> +</code></pre></div> +<p>I can’t see a cleaner solution.</p> + +</div> +</main> +<footer class='page-footer'> +<div class='container'> +<div class='row'> +<div class='col l4 s12'> +<h5 class='white-text'>Help Aparapi Grow</h5> +<p class='grey-text text-lighten-4'>We are a team of volunteers working on this project like it's our full time job. Any amount would help support and continue development on this project and is greatly appreciated.</p> +<form action='https://www.paypal.com/cgi-bin/webscr' id='paypal-donate' method='post' target='_top'> +<input name='cmd' type='hidden' value='_s-xclick'> +<input name='encrypted' type='hidden' value='-----BEGIN PKCS7-----MIIHoAYJKoZIhvcNAQcEoIIHkTCCB40CAQExggEwMIIBLAIBADCBlDCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20CAQAwDQYJKoZIhvcNAQEBBQAEgYATcKxN8t35TG2x34eY272SuZO3QbGy+BTGIM5DRV6Hmosotzw2TF42ceWmbXb3Gk4Wy5kUgo4TgHExCZHUSlHUl+A9KWLFejotgQJPhbiBsnns3klWbKftA3LEnP/kz/SW7OyBlpluoHoEGb354/aoX3JEctp3akHiZEmD7JyEgjELMAkGBSsOAwIaBQAwggEcBgkqhkiG9w0BBwEwFAYIKoZIhvcNAwcECOGCJwba6JICgIH4RtE1LE3juagKs+swI5tb9Y2LacWo+qn1H1aLKeg57bQMqqcWYvkoO1joYoglPc1h4mO0egZjHPQ6ih0K0IYlXw2SRpNylSlIMUE3GW6smjSSwRhscZfXQYUnmQsfYvkFwoKrlZGf/1u0Q7nwlZ1szIKnDMZ5f+k8xBcM0sMNutn/y9CH6A3zo01gQBIF29+1WYAoQspNAnfWQy3ydV7nbjIA9ThDp2WquWw3EVlvqlvm/3C2AFuH/L4q0ltn3qjkCdzXK0O2jW3TRrzligPkAy6CN0Tw2jGW5GENNC1L92vHFH4kBXUPlhvw39TgoN7/KRUjVoYPYgugggOHMIIDgzCCAuygAwIBAgIBADANBgkqhkiG9w0BAQUFADCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20wHhcNMDQwMjEzMTAxMzE1WhcNMzUwMjEzMTAxMzE1WjCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20wgZ8wDQYJKoZIhvcNAQEBBQADgY0AMIGJAoGBAMFHTt38RMxLXJyO2SmS+Ndl72T7oKJ4u4uw+6awntALWh03PewmIJuzbALScsTS4sZoS1fKciBGoh11gIfHzylvkdNe/hJl66/RGqrj5rFb08sAABNTzDTiqqNpJeBsYs/c2aiGozptX2RlnBktH+SUNpAajW724Nv2Wvhif6sFAgMBAAGjge4wgeswHQYDVR0OBBYEFJaffLvGbxe9WT9S1wob7BDWZJRrMIG7BgNVHSMEgbMwgbCAFJaffLvGbxe9WT9S1wob7BDWZJRroYGUpIGRMIGOMQswCQYDVQQGEwJVUzELMAkGA1UECBMCQ0ExFjAUBgNVBAcTDU1vdW50YWluIFZpZXcxFDASBgNVBAoTC1BheVBhbCBJbmMuMRMwEQYDVQQLFApsaXZlX2NlcnRzMREwDwYDVQQDFAhsaXZlX2FwaTEcMBoGCSqGSIb3DQEJARYNcmVAcGF5cGFsLmNvbYIBADAMBgNVHRMEBTADAQH/MA0GCSqGSIb3DQEBBQUAA4GBAIFfOlaagFrl71+jq6OKidbWFSE+Q4FqROvdgIONth+8kSK//Y/4ihuE4Ymvzn5ceE3S/iBSQQMjyvb+s2TWbQYDwcp129OPIbD9epdr4tJOUNiSojw7BHwYRiPh58S1xGlFgHFXwrEBb3dgNbMUa+u4qectsMAXpVHnD9wIyfmHMYIBmjCCAZYCAQEwgZQwgY4xCzAJBgNVBAYTAlVTMQswCQYDVQQIEwJDQTEWMBQGA1UEBxMNTW91bnRhaW4gVmlldzEUMBIGA1UEChMLUGF5UGFsIEluYy4xEzARBgNVBAsUCmxpdmVfY2VydHMxETAPBgNVBAMUCGxpdmVfYXBpMRwwGgYJKoZIhvcNAQkBFg1yZUBwYXlwYWwuY29tAgEAMAkGBSsOAwIaBQCgXTAYBgkqhkiG9w0BCQMxCwYJKoZIhvcNAQcBMBwGCSqGSIb3DQEJBTEPFw0xNjExMjkyMjA1NTNaMCMGCSqGSIb3DQEJBDEWBBS4i3Exr/pFcKOJy8uKmH+nGIMjqDANBgkqhkiG9w0BAQEFAASBgDAbFZ2jieloeB/0wCAcvYCFAIXmmBaMS5js/byzU5gK7exSTlRMX74IkmHemItaOcw3wyFlu4i118D9K9SbSbFiX9DGDcezGh42u/6G8TuZMwlvmiehwMioTVcm4jWG40YLiv8pJZypfoSx2w4IAFb4na5i/E1qOrwQOpiBho+s-----END PKCS7----- +'> +<button alt='PayPal - The safer, easier way to pay online!' class='btn waves-effect waves-light red lighten-3' name='action' type='submit'> +Donate Now +</button> +</form> +</div> +<div class='col l4 s12'> +<h5 class='white-text'>Join the Discussion</h5> +<p class='grey-text text-lighten-4'>We have a Gitter chat room set up where you can talk directly with us. Come in and discuss new features, future goals, general problems or questions, or anything else you can think of.</p> +<a class='btn waves-effect waves-light red lighten-3' href='https://gitter.im/Syncleus/aparapi' target='_blank'>Chat</a> +</div> +<div class='col l4 s12' style='overflow: hidden;'> +<h5 class='white-text'>Connect</h5> +<iframe allowtransparency='true' frameborder='0' height='30' scrolling='0' src='http://ghbtns.com/github-btn.html?user=Syncleus&repo=aparapi&type=watch&count=true&size=large' width='170'></iframe> +<br> +<a class='twitter-follow-button' data-dnt='true' data-show-count='true' data-size='large' href='https://twitter.com/AparapiLib'>Follow @AparapiLib</a> +<br> +<div class='g-follow' data-annotation='bubble' data-height='24' data-href='https://plus.google.com/102266131584900704956' data-rel='publisher'></div> +</div> +</div> +</div> +<div class='footer-copyright'> +<div class='container'> +© 2016-2017 Syncleus, All rights reserved. +<a class='grey-text text-lighten-4 right' href='https://github.com/Syncleus/aparapi/blob/master/LICENSE'>Apache License v2</a> +</div> +</div> +</footer> +<!-- Scripts --> +<script src='https://code.jquery.com/jquery-2.1.4.min.js'></script> +<script> + if (!window.jQuery) { document.write('<script src="bin/jquery-2.1.1.min.js"><\/script>'); } +</script> +<script src='/javascripts/jquery.timeago.js'></script> +<script src='/javascripts/materialize.min.js'></script> +<script src='/javascripts/lunr.min.js'></script> +<script src='/javascripts/search.js'></script> +<script src='/javascripts/materialize.js'></script> +<script src='/javascripts/init.js'></script> +<!-- Twitter Button --> +<script> + !function(d,s,id){var js,fjs=d.getElementsByTagName(s)[0],p=/^http:/.test(d.location)?'http':'https';if(!d.getElementById(id)){js=d.createElement(s);js.id=id;js.src=p+'://platform.twitter.com/widgets.js';fjs.parentNode.insertBefore(js,fjs);}}(document, 'script', 'twitter-wjs'); +</script> +<!-- Google Plus Button --> +<script async='' defer='defer' src='https://apis.google.com/js/platform.js'></script> +</body> </html> diff --git a/proposals/lambda-syntax.html b/proposals/lambda-syntax.html index e5415ece24269ac508432d2ba9d00eb8d19ff0f5..60105ae97bc3522863fc6f4a98a4ccc57fc982dc 100644 --- a/proposals/lambda-syntax.html +++ b/proposals/lambda-syntax.html @@ -1,227 +1,229 @@ <!DOCTYPE html> <html lang='en'> - <head> - <meta content='text/html; charset=UTF-8' http-equiv='Content-Type'> - <meta content='width=device-width, initial-scale=1' name='viewport'> - <meta content='IE=edge' http-equiv='X-UA-Compatible'> - <meta content='no' name='msapplication-tap-highlight'> - <meta content='Aparapi is an Open-source framework for executing native Java code on the GPU, developed by Syncleus.' name='description'> - <title> - Aparapi | Lambda Syntax - </title> - <!-- Favicons --> - <link href='/images/favicon/apple-touch-icon-152x152.png' rel='apple-touch-icon-precomposed'> - <meta content='#FFFFFF' name='msapplication-TileColor'> - <meta content='/images/favicon/mstile-144x144.png' name='msapplication-TileImage'> - <link href='/images/favicon/favicon-32x32.png' rel='icon' sizes='32x32'> - <!-- Android 5 Chrome Color --> - <meta content='#EE6E73' name='theme-color'> - <!-- CSS --> - <link href='/stylesheets/highlight.css' media='screen,projection' rel='stylesheet' type='text/css'> - <link href='/stylesheets/style.css' media='screen,projection' rel='stylesheet' type='text/css'> - <link href='http://fonts.googleapis.com/css?family=Inconsolata' rel='stylesheet' type='text/css'> - <link href='http://fonts.googleapis.com/icon?family=Material+Icons' rel='stylesheet'> - </head> - <body> - <header> - <div class='container'> - <a class='button-collapse top-nav waves-effect waves-light circle hide-on-large-only' data-activates='nav-mobile' href='#'> - <i class='material-icons'>menu</i> - </a> - </div> - <ul class='side-nav fixed' id='nav-mobile'> - <li class='logo'> - <a class='brand-logo' href='/' id='logo-container'> - <object data='/images/logo.svg' id='front-page-logo' type='image/svg+xml'>Your browser does not support SVG</object> - </a> - </li> - <li class='search'> - <div class='search-wrapper card'> - <input id='search'> - <i class='material-icons'>search</i> - <div class='search-results'></div> - </div> - </li> - <li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/">Overview</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Introduction</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/about.html">About</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/getting-started.html">Getting Started</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/faq.html">FAQ</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Documentation</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/aparapi-patterns.html">Aparapi Patterns</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/choosing-specific-devices.html">Choosing Specific Devices</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/converting-java-to-opencl.html">Converting Java to OpenCL</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/emulating-multiple-entrypoints.html">Emulating Multiple Entrypoints</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/explicit-buffer-handling.html">Explicit Buffer Handling</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/hsa-enabled-lambda.html">HSA Enabled Lambda</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/kernel-guidelines.html">Kernel Guidelines</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/library-agent-duality.html">Library Agent Duality</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/new-features.html">New Features</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/opencl-bindings.html">OpenCL Bindings</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/private-memory-space.html">Private Memory Space</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/profiling-the-kernel.html">Profiling the Kernel</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/setting-up-hsa.html">Setting Up HSA</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/unit-tests.html">Unit Tests</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/using-hsa-simulator.html">Using HSA Simulator</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/constant-memory.html">Constant Memory</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/local-memory.html">Local Memory</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/multiple-dim-ranges.html">Multiple Dim Ranges</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Proposals</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/multiple-dim-nd-range.html">Multiple Dim ND Range</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/lambdas.html">Lambdas</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/address-space-with-buffers.html">Address Space with Buffers</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/extensions.html">Extensions</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/device.html">Device</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/multiple-entry-points.html">Multiple Entry Points</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/lambda-syntax.html">Lambda Syntax</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/showcase.html">Showcase</a></li></ul></li> - </ul> - </header> - <main> - <div class='section no-pad-bot' id='index-banner'> - <div class='container'> - <h1 class='header center-on-small-only'>Lambda Syntax</h1> - <div class='row center'> - <h4 class='header col s12 light center'>Syntax suggestions for HSA enabled Aparapi.</h4> - </div> - - </div> - - </div> - <div class='container'> - <h1>Introduction</h1> - - <p>Now that Java 8 is nearly upon us and HSA enabled Aparapi ‘lambda’ branch is usable (though in no way complete) I figured we could use this page to discuss the 'programming model’ we might prefer for Aparapi, and contrast with the API’s for the new Java 8 lambda based stream APIs.</p> - - <h2>Converting between Aparapi HSA + Java 8 enabled Aparapi</h2> - - <p>Our <strong>hello world</strong> app has always been the “vector add”. In classic Aparapi we could transform</p> - <pre class="highlight java"><code> - <span class="kd">final</span> <span class="kt">float</span> <span class="n">inA</span><span class="o">[]</span> <span class="o">=</span> <span class="o">....</span> <span class="c1">// get a float array from somewhere</span> - <span class="kd">final</span> <span class="kt">float</span> <span class="n">inB</span><span class="o">[]</span> <span class="o">=</span> <span class="o">....</span> <span class="c1">// get a float from somewhere</span> - <span class="c1">// assume (inA.length==inB.length)</span> - <span class="kd">final</span> <span class="kt">float</span> <span class="n">result</span> <span class="o">=</span> <span class="k">new</span> <span class="kt">float</span><span class="o">[</span><span class="n">inA</span><span class="o">.</span><span class="na">length</span><span class="o">];</span> - - <span class="k">for</span> <span class="o">(</span><span class="kt">int</span> <span class="n">i</span><span class="o">=</span><span class="mi">0</span><span class="o">;</span> <span class="n">i</span><span class="o"><</span><span class="n">array</span><span class="o">.</span><span class="na">length</span><span class="o">;</span> <span class="n">i</span><span class="o">++){</span> - <span class="n">result</span><span class="o">[</span><span class="n">i</span><span class="o">]=</span><span class="n">intA</span><span class="o">[</span><span class="n">i</span><span class="o">]+</span><span class="n">inB</span><span class="o">[</span><span class="n">i</span><span class="o">];</span> - <span class="o">}</span> - </code></pre> - <p>to</p> - <pre class="highlight java"><code> - <span class="n">Kernel</span> <span class="n">kernel</span> <span class="o">=</span> <span class="k">new</span> <span class="n">Kernel</span><span class="o">(){</span> - <span class="nd">@Override</span> <span class="kd">public</span> <span class="kt">void</span> <span class="nf">run</span><span class="o">(){</span> - <span class="kt">int</span> <span class="n">i</span><span class="o">=</span> <span class="n">getGlobalId</span><span class="o">();</span> - <span class="n">result</span><span class="o">[</span><span class="n">i</span><span class="o">]=</span><span class="n">intA</span><span class="o">[</span><span class="n">i</span><span class="o">]+</span><span class="n">inB</span><span class="o">[</span><span class="n">i</span><span class="o">];</span> - <span class="o">}</span> - <span class="o">};</span> - <span class="n">Range</span> <span class="n">range</span> <span class="o">=</span> <span class="n">Range</span><span class="o">.</span><span class="na">create</span><span class="o">(</span><span class="n">result</span><span class="o">.</span><span class="na">length</span><span class="o">);</span> - <span class="n">kernel</span><span class="o">.</span><span class="na">execute</span><span class="o">(</span><span class="n">range</span><span class="o">);</span> - </code></pre> - <p>For the lambda aparapi branch we can currently use</p> - <pre class="highlight java"><code> - <span class="n">Device</span><span class="o">.</span><span class="na">hsa</span><span class="o">().</span><span class="na">forEach</span><span class="o">(</span><span class="n">result</span><span class="o">.</span><span class="na">length</span><span class="o">,</span> <span class="n">i</span><span class="o">-></span> <span class="n">result</span><span class="o">[</span><span class="n">i</span><span class="o">]=</span><span class="n">intA</span><span class="o">[</span><span class="n">i</span><span class="o">]+</span><span class="n">inB</span><span class="o">[</span><span class="n">i</span><span class="o">]);</span> - </code></pre> - <p>Note that the closest Java 8 construct is</p> - <pre class="highlight java"><code> - <span class="n">IntStream</span><span class="o">.</span><span class="na">range</span><span class="o">(</span><span class="mi">0</span><span class="o">,</span> <span class="n">result</span><span class="o">.</span><span class="na">length</span><span class="o">).</span><span class="na">parallel</span><span class="o">().</span><span class="na">forEach</span><span class="o">(</span><span class="n">i</span><span class="o">-></span> <span class="n">result</span><span class="o">[</span><span class="n">i</span><span class="o">]=</span><span class="n">intA</span><span class="o">[</span><span class="n">i</span><span class="o">]+</span><span class="n">inB</span><span class="o">[</span><span class="n">i</span><span class="o">]);</span> - </code></pre> - <p>Aparapi and Java 8 stream API’s both use IntConsumer as the lambda type. So you can reuse the lambda.</p> - <pre class="highlight java"><code> - <span class="n">IntConsumer</span> <span class="n">lambda</span> <span class="o">=</span> <span class="n">i</span><span class="o">-></span> <span class="n">result</span><span class="o">[</span><span class="n">i</span><span class="o">]=</span><span class="n">intA</span><span class="o">[</span><span class="n">i</span><span class="o">]+</span><span class="n">inB</span><span class="o">[</span><span class="n">i</span><span class="o">];</span> - - <span class="n">IntStream</span><span class="o">.</span><span class="na">range</span><span class="o">(</span><span class="mi">0</span><span class="o">,</span> <span class="n">result</span><span class="o">.</span><span class="na">length</span><span class="o">).</span><span class="na">parallel</span><span class="o">().</span><span class="na">forEach</span><span class="o">(</span><span class="n">lambda</span><span class="o">);</span> - <span class="n">Device</span><span class="o">.</span><span class="na">hsa</span><span class="o">().</span><span class="na">forEach</span><span class="o">(</span><span class="n">result</span><span class="o">.</span><span class="na">length</span><span class="o">,</span> <span class="n">lambda</span><span class="o">);</span> - </code></pre> - <p>Exposing the Deviceness of this was a conscious effort. We may also hide it completely.</p> - <pre class="highlight java"><code> - <span class="n">IntConsumer</span> <span class="n">lambda</span> <span class="o">=</span> <span class="n">i</span><span class="o">-></span> <span class="n">result</span><span class="o">[</span><span class="n">i</span><span class="o">]=</span><span class="n">intA</span><span class="o">[</span><span class="n">i</span><span class="o">]+</span><span class="n">inB</span><span class="o">[</span><span class="n">i</span><span class="o">];</span> - - <span class="n">IntStream</span><span class="o">.</span><span class="na">range</span><span class="o">(</span><span class="mi">0</span><span class="o">,</span> <span class="n">result</span><span class="o">.</span><span class="na">length</span><span class="o">).</span><span class="na">parallel</span><span class="o">().</span><span class="na">forEach</span><span class="o">(</span><span class="n">lambda</span><span class="o">);</span> - <span class="n">Aparapi</span><span class="o">.</span><span class="na">forEach</span><span class="o">(</span><span class="n">result</span><span class="o">.</span><span class="na">length</span><span class="o">,</span> <span class="n">lambda</span><span class="o">);</span> - </code></pre> - <p>I am toying with providing an API which maps more closely to the Stream API from Java 8.</p> - - <p>Maybe</p> - <pre class="highlight java"><code> - <span class="n">IntStream</span><span class="o">.</span><span class="na">range</span><span class="o">(</span><span class="mi">0</span><span class="o">,</span> <span class="n">result</span><span class="o">.</span><span class="na">length</span><span class="o">).</span><span class="na">parallel</span><span class="o">().</span><span class="na">forEach</span><span class="o">(</span><span class="n">lambda</span><span class="o">);</span> - <span class="n">Aparapi</span><span class="o">.</span><span class="na">range</span><span class="o">(</span><span class="mi">0</span><span class="o">,</span> <span class="n">result</span><span class="o">.</span><span class="na">length</span><span class="o">).</span><span class="na">parallel</span><span class="o">().</span><span class="na">forEach</span><span class="o">(</span><span class="n">lambda</span><span class="o">);</span> - </code></pre> - <p>This way users can more readily swap between the two.</p> - - <p>For collections/arrays in Aparapi we can also offer</p> - <pre class="highlight java"><code> - <span class="n">T</span><span class="o">[]</span> <span class="n">arr</span> <span class="o">=</span> <span class="c1">// get an array of T from somewhere</span> - <span class="n">ArrayList</span><span class="o"><</span><span class="n">T</span><span class="o">></span> <span class="n">list</span> <span class="o">=</span> <span class="c1">// get an array backed list of T from somewhere</span> - - <span class="n">Aparapi</span><span class="o">.</span><span class="na">range</span><span class="o">(</span><span class="n">arr</span><span class="o">).</span><span class="na">forEach</span><span class="o">(</span><span class="n">t</span> <span class="o">-></span> <span class="cm">/* do something with each T */</span><span class="o">);</span> - </code></pre> - <p>We can create special cases. Say for mutating images</p> - <pre class="highlight java"><code> - <span class="n">BufferedImage</span> <span class="n">in</span><span class="o">,</span> <span class="n">out</span><span class="o">;</span> - <span class="n">Aparapi</span><span class="o">.</span><span class="na">forEachPixel</span><span class="o">(</span><span class="n">in</span><span class="o">,</span> <span class="n">out</span><span class="o">,</span> <span class="n">rgb</span><span class="o">[]</span> <span class="o">-></span> <span class="n">rgb</span><span class="o">[</span><span class="mi">0</span><span class="o">]</span> <span class="o">=</span> <span class="mi">0</span> <span class="o">);</span> - </code></pre> - <p>We may also need select operations for associative operations</p> - <pre class="highlight java"><code> - <span class="kd">class</span> <span class="nc">Person</span><span class="o">{</span> - <span class="kt">int</span> <span class="n">age</span><span class="o">;</span> - <span class="n">String</span> <span class="n">first</span><span class="o">;</span> - <span class="n">String</span> <span class="n">last</span><span class="o">;</span> - <span class="o">};</span> - - <span class="n">Aparapi</span><span class="o">.</span><span class="na">selectOne</span><span class="o">(</span><span class="n">Person</span><span class="o">[]</span> <span class="n">people</span><span class="o">,</span> <span class="o">(</span><span class="n">p1</span><span class="o">,</span><span class="n">p2</span><span class="o">)-></span> <span class="n">p1</span><span class="o">.</span><span class="na">age</span><span class="o">></span><span class="n">p2</span><span class="o">.</span><span class="na">age</span><span class="o">?</span><span class="nl">p1:</span><span class="n">p2</span> <span class="o">);</span> - </code></pre> - <h2>A case for map reduce</h2> - - <p>A mapper maps from one type to another. Possibly by extracting state. Here is a mapper which maps each String in an array of Strings to its length.</p> - - <p>As if the mapper was</p> - <pre class="highlight java"><code> - <span class="kd">interface</span> <span class="nc">mapToInt</span><span class="o"><</span><span class="n">T</span><span class="o">>{</span> <span class="kt">int</span> <span class="nf">map</span><span class="o">(</span><span class="n">T</span> <span class="n">v</span><span class="o">);</span> <span class="o">}</span> - </code></pre> - <p>Here it is in action.</p> - <pre class="highlight java"><code> - <span class="n">Aparapi</span><span class="o">.</span><span class="na">range</span><span class="o">(</span><span class="n">strings</span><span class="o">).</span><span class="na">map</span><span class="o">(</span><span class="n">s</span><span class="o">-></span><span class="n">string</span><span class="o">.</span><span class="na">length</span><span class="o">())...</span> - </code></pre> - <p>Now the result is a stream of int’s which can be 'reduced’ by a reduction lambda.</p> - - <p>In this case the reduction reduces two int’s to one, by choosing the max of k and v. All reductions must be commutative style operations (max, min, add) where the order of execution is not important.</p> - <pre class="highlight java"><code> - <span class="kt">int</span> <span class="n">lengthOfLongestString</span> <span class="o">=</span> <span class="n">Aparapi</span><span class="o">.</span><span class="na">range</span><span class="o">(</span><span class="n">strings</span><span class="o">).</span><span class="na">map</span><span class="o">(</span><span class="n">s</span><span class="o">-></span><span class="n">string</span><span class="o">.</span><span class="na">length</span><span class="o">()).</span><span class="na">reduce</span><span class="o">((</span><span class="n">k</span><span class="o">,</span><span class="n">v</span><span class="o">)-></span> <span class="n">k</span><span class="o">></span><span class="n">v</span><span class="o">?</span><span class="nl">k:</span><span class="n">v</span><span class="o">);</span> - </code></pre> - <p>Here we had a sum reduction.</p> - <pre class="highlight java"><code> - <span class="kt">int</span> <span class="n">sumOfLengths</span> <span class="o">=</span> <span class="n">Aparapi</span><span class="o">.</span><span class="na">range</span><span class="o">(</span><span class="n">strings</span><span class="o">).</span><span class="na">map</span><span class="o">(</span><span class="n">s</span> <span class="o">-></span><span class="n">string</span><span class="o">.</span><span class="na">length</span><span class="o">()).</span><span class="na">reduce</span><span class="o">((</span><span class="n">k</span><span class="o">,</span><span class="n">v</span><span class="o">)-></span> <span class="n">k</span><span class="o">+</span><span class="n">v</span><span class="o">);</span> - </code></pre> - <p>Some of these may be common enough that we offer direct functionality.</p> - <pre class="highlight java"><code> - <span class="kt">int</span> <span class="n">sumOfLengths</span> <span class="o">=</span> <span class="n">Aparapi</span><span class="o">.</span><span class="na">range</span><span class="o">(</span><span class="n">strings</span><span class="o">).</span><span class="na">map</span><span class="o">(</span><span class="n">s</span> <span class="o">-></span><span class="n">string</span><span class="o">.</span><span class="na">length</span><span class="o">()).</span><span class="na">sum</span><span class="o">();</span> - <span class="kt">int</span> <span class="n">maxOfLengths</span> <span class="o">=</span> <span class="n">Aparapi</span><span class="o">.</span><span class="na">range</span><span class="o">(</span><span class="n">strings</span><span class="o">).</span><span class="na">map</span><span class="o">(</span><span class="n">s</span> <span class="o">-></span><span class="n">string</span><span class="o">.</span><span class="na">length</span><span class="o">()).</span><span class="na">max</span><span class="o">();</span> - <span class="kt">int</span> <span class="n">minOfLengths</span> <span class="o">=</span> <span class="n">Aparapi</span><span class="o">.</span><span class="na">range</span><span class="o">(</span><span class="n">strings</span><span class="o">).</span><span class="na">map</span><span class="o">(</span><span class="n">s</span> <span class="o">-></span><span class="n">string</span><span class="o">.</span><span class="na">length</span><span class="o">()).</span><span class="na">min</span><span class="o">();</span> - <span class="n">String</span> <span class="n">string</span> <span class="o">=</span> <span class="n">Aparapi</span><span class="o">.</span><span class="na">range</span><span class="o">(</span><span class="n">strings</span><span class="o">).</span><span class="na">map</span><span class="o">(</span><span class="n">s</span><span class="o">-></span><span class="n">string</span><span class="o">.</span><span class="na">length</span><span class="o">()).</span><span class="na">select</span><span class="o">((</span><span class="n">k</span><span class="o">,</span><span class="n">v</span><span class="o">)-></span> <span class="n">k</span><span class="o">></span><span class="n">v</span><span class="o">);</span> - </code></pre> - <p>This last one needs some explaining. We map String to int then select the String whose length is the greatest.</p> - </div> - </main> - <footer class='page-footer'> - <div class='container'> - <div class='row'> - <div class='col l4 s12'> - <h5 class='white-text'>Help Aparapi Grow</h5> - <p class='grey-text text-lighten-4'>We are a team of volunteers working on this project like it's our full time job. Any amount would help support and continue development on this project and is greatly appreciated.</p> - <form action='https://www.paypal.com/cgi-bin/webscr' id='paypal-donate' method='post' target='_top'> - <input name='cmd' type='hidden' value='_s-xclick'> - <input name='encrypted' type='hidden' value='-----BEGIN PKCS7-----MIIHoAYJKoZIhvcNAQcEoIIHkTCCB40CAQExggEwMIIBLAIBADCBlDCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20CAQAwDQYJKoZIhvcNAQEBBQAEgYATcKxN8t35TG2x34eY272SuZO3QbGy+BTGIM5DRV6Hmosotzw2TF42ceWmbXb3Gk4Wy5kUgo4TgHExCZHUSlHUl+A9KWLFejotgQJPhbiBsnns3klWbKftA3LEnP/kz/SW7OyBlpluoHoEGb354/aoX3JEctp3akHiZEmD7JyEgjELMAkGBSsOAwIaBQAwggEcBgkqhkiG9w0BBwEwFAYIKoZIhvcNAwcECOGCJwba6JICgIH4RtE1LE3juagKs+swI5tb9Y2LacWo+qn1H1aLKeg57bQMqqcWYvkoO1joYoglPc1h4mO0egZjHPQ6ih0K0IYlXw2SRpNylSlIMUE3GW6smjSSwRhscZfXQYUnmQsfYvkFwoKrlZGf/1u0Q7nwlZ1szIKnDMZ5f+k8xBcM0sMNutn/y9CH6A3zo01gQBIF29+1WYAoQspNAnfWQy3ydV7nbjIA9ThDp2WquWw3EVlvqlvm/3C2AFuH/L4q0ltn3qjkCdzXK0O2jW3TRrzligPkAy6CN0Tw2jGW5GENNC1L92vHFH4kBXUPlhvw39TgoN7/KRUjVoYPYgugggOHMIIDgzCCAuygAwIBAgIBADANBgkqhkiG9w0BAQUFADCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20wHhcNMDQwMjEzMTAxMzE1WhcNMzUwMjEzMTAxMzE1WjCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20wgZ8wDQYJKoZIhvcNAQEBBQADgY0AMIGJAoGBAMFHTt38RMxLXJyO2SmS+Ndl72T7oKJ4u4uw+6awntALWh03PewmIJuzbALScsTS4sZoS1fKciBGoh11gIfHzylvkdNe/hJl66/RGqrj5rFb08sAABNTzDTiqqNpJeBsYs/c2aiGozptX2RlnBktH+SUNpAajW724Nv2Wvhif6sFAgMBAAGjge4wgeswHQYDVR0OBBYEFJaffLvGbxe9WT9S1wob7BDWZJRrMIG7BgNVHSMEgbMwgbCAFJaffLvGbxe9WT9S1wob7BDWZJRroYGUpIGRMIGOMQswCQYDVQQGEwJVUzELMAkGA1UECBMCQ0ExFjAUBgNVBAcTDU1vdW50YWluIFZpZXcxFDASBgNVBAoTC1BheVBhbCBJbmMuMRMwEQYDVQQLFApsaXZlX2NlcnRzMREwDwYDVQQDFAhsaXZlX2FwaTEcMBoGCSqGSIb3DQEJARYNcmVAcGF5cGFsLmNvbYIBADAMBgNVHRMEBTADAQH/MA0GCSqGSIb3DQEBBQUAA4GBAIFfOlaagFrl71+jq6OKidbWFSE+Q4FqROvdgIONth+8kSK//Y/4ihuE4Ymvzn5ceE3S/iBSQQMjyvb+s2TWbQYDwcp129OPIbD9epdr4tJOUNiSojw7BHwYRiPh58S1xGlFgHFXwrEBb3dgNbMUa+u4qectsMAXpVHnD9wIyfmHMYIBmjCCAZYCAQEwgZQwgY4xCzAJBgNVBAYTAlVTMQswCQYDVQQIEwJDQTEWMBQGA1UEBxMNTW91bnRhaW4gVmlldzEUMBIGA1UEChMLUGF5UGFsIEluYy4xEzARBgNVBAsUCmxpdmVfY2VydHMxETAPBgNVBAMUCGxpdmVfYXBpMRwwGgYJKoZIhvcNAQkBFg1yZUBwYXlwYWwuY29tAgEAMAkGBSsOAwIaBQCgXTAYBgkqhkiG9w0BCQMxCwYJKoZIhvcNAQcBMBwGCSqGSIb3DQEJBTEPFw0xNjExMjkyMjA1NTNaMCMGCSqGSIb3DQEJBDEWBBS4i3Exr/pFcKOJy8uKmH+nGIMjqDANBgkqhkiG9w0BAQEFAASBgDAbFZ2jieloeB/0wCAcvYCFAIXmmBaMS5js/byzU5gK7exSTlRMX74IkmHemItaOcw3wyFlu4i118D9K9SbSbFiX9DGDcezGh42u/6G8TuZMwlvmiehwMioTVcm4jWG40YLiv8pJZypfoSx2w4IAFb4na5i/E1qOrwQOpiBho+s-----END PKCS7-----'> - <button alt='PayPal - The safer, easier way to pay online!' class='btn waves-effect waves-light red lighten-3' name='action' type='submit'> - Donate Now - </button> - </form> - </div> - <div class='col l4 s12'> - <h5 class='white-text'>Join the Discussion</h5> - <p class='grey-text text-lighten-4'>We have a Gitter chat room set up where you can talk directly with us. Come in and discuss new features, future goals, general problems or questions, or anything else you can think of.</p> - <a class='btn waves-effect waves-light red lighten-3' href='https://gitter.im/Syncleus/aparapi' target='_blank'>Chat</a> - </div> - <div class='col l4 s12' style='overflow: hidden;'> - <h5 class='white-text'>Connect</h5> - <iframe allowtransparency='true' frameborder='0' height='30' scrolling='0' src='http://ghbtns.com/github-btn.html?user=Syncleus&repo=aparapi&type=watch&count=true&size=large' width='170'></iframe> - <br> - <a class='twitter-follow-button' data-dnt='true' data-show-count='true' data-size='large' href='https://twitter.com/AparapiLib'>Follow @AparapiLib</a> - <br> - <div class='g-follow' data-annotation='bubble' data-height='24' data-href='https://plus.google.com/102266131584900704956' data-rel='publisher'></div> - </div> - </div> - </div> - <div class='footer-copyright'> - <div class='container'> - © 2016-2017 Syncleus, All rights reserved. - <a class='grey-text text-lighten-4 right' href='https://github.com/Syncleus/aparapi/blob/master/LICENSE'>Apache License v2</a> - </div> - </div> - </footer> - <!-- Scripts --> - <script src='https://code.jquery.com/jquery-2.1.4.min.js'></script> - <script> - if (!window.jQuery) { document.write('<script src="bin/jquery-2.1.1.min.js"><\/script>'); } - </script> - <script src='/javascripts/jquery.timeago.js'></script> - <script src='/javascripts/materialize.min.js'></script> - <script src='/javascripts/lunr.min.js'></script> - <script src='/javascripts/search.js'></script> - <script src='/javascripts/materialize.js'></script> - <script src='/javascripts/init.js'></script> - <!-- Twitter Button --> - <script> - !function(d,s,id){var js,fjs=d.getElementsByTagName(s)[0],p=/^http:/.test(d.location)?'http':'https';if(!d.getElementById(id)){js=d.createElement(s);js.id=id;js.src=p+'://platform.twitter.com/widgets.js';fjs.parentNode.insertBefore(js,fjs);}}(document, 'script', 'twitter-wjs'); - </script> - <!-- Google Plus Button --> - <script async='' defer='defer' src='https://apis.google.com/js/platform.js'></script> - </body> +<head> +<meta content='text/html; charset=UTF-8' http-equiv='Content-Type'> +<meta content='width=device-width, initial-scale=1' name='viewport'> +<meta content='IE=edge' http-equiv='X-UA-Compatible'> +<meta content='no' name='msapplication-tap-highlight'> +<meta content='Aparapi is an Open-source framework for executing native Java code on the GPU, developed by Syncleus.' name='description'> +<title> +Aparapi | Lambda Syntax +</title> +<!-- Favicons --> +<link href='/images/favicon/apple-touch-icon-152x152.png' rel='apple-touch-icon-precomposed'> +<meta content='#FFFFFF' name='msapplication-TileColor'> +<meta content='/images/favicon/mstile-144x144.png' name='msapplication-TileImage'> +<link href='/images/favicon/favicon-32x32.png' rel='icon' sizes='32x32'> +<!-- Android 5 Chrome Color --> +<meta content='#EE6E73' name='theme-color'> +<!-- CSS --> +<link href='/stylesheets/highlight.css' media='screen,projection' rel='stylesheet' type='text/css'> +<link href='/stylesheets/style.css' media='screen,projection' rel='stylesheet' type='text/css'> +<link href='http://fonts.googleapis.com/css?family=Inconsolata' rel='stylesheet' type='text/css'> +<link href='http://fonts.googleapis.com/icon?family=Material+Icons' rel='stylesheet'> +</head> +<body> +<header> +<div class='container'> +<a class='button-collapse top-nav waves-effect waves-light circle hide-on-large-only' data-activates='nav-mobile' href='#'> +<i class='material-icons'>menu</i> +</a> +</div> +<ul class='side-nav fixed' id='nav-mobile'> +<li class='logo'> +<a class='brand-logo' href='/' id='logo-container'> +<object data='/images/logo.svg' id='front-page-logo' type='image/svg+xml'>Your browser does not support SVG</object> +</a> +</li> +<li class='search'> +<div class='search-wrapper card'> +<input id='search'> +<i class='material-icons'>search</i> +<div class='search-results'></div> +</div> +</li> +<li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/">Overview</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Introduction</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/about.html">About</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/getting-started.html">Getting Started</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/faq.html">FAQ</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Documentation</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/aparapi-patterns.html">Aparapi Patterns</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/choosing-specific-devices.html">Choosing Specific Devices</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/converting-java-to-opencl.html">Converting Java to OpenCL</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/emulating-multiple-entrypoints.html">Emulating Multiple Entrypoints</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/explicit-buffer-handling.html">Explicit Buffer Handling</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/hsa-enabled-lambda.html">HSA Enabled Lambda</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/kernel-guidelines.html">Kernel Guidelines</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/library-agent-duality.html">Library Agent Duality</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/new-features.html">New Features</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/opencl-bindings.html">OpenCL Bindings</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/private-memory-space.html">Private Memory Space</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/profiling-the-kernel.html">Profiling the Kernel</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/setting-up-hsa.html">Setting Up HSA</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/unit-tests.html">Unit Tests</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/using-hsa-simulator.html">Using HSA Simulator</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/constant-memory.html">Constant Memory</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/local-memory.html">Local Memory</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/multiple-dim-ranges.html">Multiple Dim Ranges</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Proposals</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/multiple-dim-nd-range.html">Multiple Dim ND Range</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/lambdas.html">Lambdas</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/address-space-with-buffers.html">Address Space with Buffers</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/extensions.html">Extensions</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/device.html">Device</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/multiple-entry-points.html">Multiple Entry Points</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/lambda-syntax.html">Lambda Syntax</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/showcase.html">Showcase</a></li></ul></li> +</ul> +</header> +<main> +<div class='section no-pad-bot' id='index-banner'> +<div class='container'> +<h1 class='header center-on-small-only'>Lambda Syntax</h1> +<div class='row center'> +<h4 class='header col s12 light center'>Syntax suggestions for HSA enabled Aparapi.</h4> +</div> + +</div> + +</div> +<div class='container'> +<h1>Introduction</h1> + +<p>Now that Java 8 is nearly upon us and HSA enabled Aparapi ‘lambda’ branch is usable (though in no way complete) I figured we could use this page to discuss the ‘programming model’ we might prefer for Aparapi, and contrast with the API’s for the new Java 8 lambda based stream APIs.</p> + +<h2>Converting between Aparapi HSA + Java 8 enabled Aparapi</h2> + +<p>Our <strong>hello world</strong> app has always been the “vector add”. In classic Aparapi we could transform</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="kd">final</span> <span class="kt">float</span> <span class="n">inA</span><span class="o">[]</span> <span class="o">=</span> <span class="o">....</span> <span class="c1">// get a float array from somewhere</span> +<span class="kd">final</span> <span class="kt">float</span> <span class="n">inB</span><span class="o">[]</span> <span class="o">=</span> <span class="o">....</span> <span class="c1">// get a float from somewhere</span> + <span class="c1">// assume (inA.length==inB.length)</span> +<span class="kd">final</span> <span class="kt">float</span> <span class="n">result</span> <span class="o">=</span> <span class="k">new</span> <span class="kt">float</span><span class="o">[</span><span class="n">inA</span><span class="o">.</span><span class="na">length</span><span class="o">];</span> + +<span class="k">for</span> <span class="o">(</span><span class="kt">int</span> <span class="n">i</span><span class="o">=</span><span class="mi">0</span><span class="o">;</span> <span class="n">i</span><span class="o"><</span><span class="n">array</span><span class="o">.</span><span class="na">length</span><span class="o">;</span> <span class="n">i</span><span class="o">++){</span> + <span class="n">result</span><span class="o">[</span><span class="n">i</span><span class="o">]=</span><span class="n">intA</span><span class="o">[</span><span class="n">i</span><span class="o">]+</span><span class="n">inB</span><span class="o">[</span><span class="n">i</span><span class="o">];</span> +<span class="o">}</span> +</code></pre></div> +<p>to</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="n">Kernel</span> <span class="n">kernel</span> <span class="o">=</span> <span class="k">new</span> <span class="n">Kernel</span><span class="o">(){</span> + <span class="nd">@Override</span> <span class="kd">public</span> <span class="kt">void</span> <span class="nf">run</span><span class="o">(){</span> + <span class="kt">int</span> <span class="n">i</span><span class="o">=</span> <span class="n">getGlobalId</span><span class="o">();</span> + <span class="n">result</span><span class="o">[</span><span class="n">i</span><span class="o">]=</span><span class="n">intA</span><span class="o">[</span><span class="n">i</span><span class="o">]+</span><span class="n">inB</span><span class="o">[</span><span class="n">i</span><span class="o">];</span> + <span class="o">}</span> +<span class="o">};</span> +<span class="n">Range</span> <span class="n">range</span> <span class="o">=</span> <span class="n">Range</span><span class="o">.</span><span class="na">create</span><span class="o">(</span><span class="n">result</span><span class="o">.</span><span class="na">length</span><span class="o">);</span> +<span class="n">kernel</span><span class="o">.</span><span class="na">execute</span><span class="o">(</span><span class="n">range</span><span class="o">);</span> +</code></pre></div> +<p>For the lambda aparapi branch we can currently use</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="n">Device</span><span class="o">.</span><span class="na">hsa</span><span class="o">().</span><span class="na">forEach</span><span class="o">(</span><span class="n">result</span><span class="o">.</span><span class="na">length</span><span class="o">,</span> <span class="n">i</span><span class="o">-></span> <span class="n">result</span><span class="o">[</span><span class="n">i</span><span class="o">]=</span><span class="n">intA</span><span class="o">[</span><span class="n">i</span><span class="o">]+</span><span class="n">inB</span><span class="o">[</span><span class="n">i</span><span class="o">]);</span> +</code></pre></div> +<p>Note that the closest Java 8 construct is</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="n">IntStream</span><span class="o">.</span><span class="na">range</span><span class="o">(</span><span class="mi">0</span><span class="o">,</span> <span class="n">result</span><span class="o">.</span><span class="na">length</span><span class="o">).</span><span class="na">parallel</span><span class="o">().</span><span class="na">forEach</span><span class="o">(</span><span class="n">i</span><span class="o">-></span> <span class="n">result</span><span class="o">[</span><span class="n">i</span><span class="o">]=</span><span class="n">intA</span><span class="o">[</span><span class="n">i</span><span class="o">]+</span><span class="n">inB</span><span class="o">[</span><span class="n">i</span><span class="o">]);</span> +</code></pre></div> +<p>Aparapi and Java 8 stream API’s both use IntConsumer as the lambda type. So you can reuse the lambda.</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="n">IntConsumer</span> <span class="n">lambda</span> <span class="o">=</span> <span class="n">i</span><span class="o">-></span> <span class="n">result</span><span class="o">[</span><span class="n">i</span><span class="o">]=</span><span class="n">intA</span><span class="o">[</span><span class="n">i</span><span class="o">]+</span><span class="n">inB</span><span class="o">[</span><span class="n">i</span><span class="o">];</span> + +<span class="n">IntStream</span><span class="o">.</span><span class="na">range</span><span class="o">(</span><span class="mi">0</span><span class="o">,</span> <span class="n">result</span><span class="o">.</span><span class="na">length</span><span class="o">).</span><span class="na">parallel</span><span class="o">().</span><span class="na">forEach</span><span class="o">(</span><span class="n">lambda</span><span class="o">);</span> +<span class="n">Device</span><span class="o">.</span><span class="na">hsa</span><span class="o">().</span><span class="na">forEach</span><span class="o">(</span><span class="n">result</span><span class="o">.</span><span class="na">length</span><span class="o">,</span> <span class="n">lambda</span><span class="o">);</span> +</code></pre></div> +<p>Exposing the Deviceness of this was a conscious effort. We may also hide it completely.</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="n">IntConsumer</span> <span class="n">lambda</span> <span class="o">=</span> <span class="n">i</span><span class="o">-></span> <span class="n">result</span><span class="o">[</span><span class="n">i</span><span class="o">]=</span><span class="n">intA</span><span class="o">[</span><span class="n">i</span><span class="o">]+</span><span class="n">inB</span><span class="o">[</span><span class="n">i</span><span class="o">];</span> + +<span class="n">IntStream</span><span class="o">.</span><span class="na">range</span><span class="o">(</span><span class="mi">0</span><span class="o">,</span> <span class="n">result</span><span class="o">.</span><span class="na">length</span><span class="o">).</span><span class="na">parallel</span><span class="o">().</span><span class="na">forEach</span><span class="o">(</span><span class="n">lambda</span><span class="o">);</span> +<span class="n">Aparapi</span><span class="o">.</span><span class="na">forEach</span><span class="o">(</span><span class="n">result</span><span class="o">.</span><span class="na">length</span><span class="o">,</span> <span class="n">lambda</span><span class="o">);</span> +</code></pre></div> +<p>I am toying with providing an API which maps more closely to the Stream API from Java 8.</p> + +<p>Maybe</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="n">IntStream</span><span class="o">.</span><span class="na">range</span><span class="o">(</span><span class="mi">0</span><span class="o">,</span> <span class="n">result</span><span class="o">.</span><span class="na">length</span><span class="o">).</span><span class="na">parallel</span><span class="o">().</span><span class="na">forEach</span><span class="o">(</span><span class="n">lambda</span><span class="o">);</span> +<span class="n">Aparapi</span><span class="o">.</span><span class="na">range</span><span class="o">(</span><span class="mi">0</span><span class="o">,</span> <span class="n">result</span><span class="o">.</span><span class="na">length</span><span class="o">).</span><span class="na">parallel</span><span class="o">().</span><span class="na">forEach</span><span class="o">(</span><span class="n">lambda</span><span class="o">);</span> +</code></pre></div> +<p>This way users can more readily swap between the two.</p> + +<p>For collections/arrays in Aparapi we can also offer</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="n">T</span><span class="o">[]</span> <span class="n">arr</span> <span class="o">=</span> <span class="c1">// get an array of T from somewhere</span> +<span class="n">ArrayList</span><span class="o"><</span><span class="n">T</span><span class="o">></span> <span class="n">list</span> <span class="o">=</span> <span class="c1">// get an array backed list of T from somewhere</span> + +<span class="n">Aparapi</span><span class="o">.</span><span class="na">range</span><span class="o">(</span><span class="n">arr</span><span class="o">).</span><span class="na">forEach</span><span class="o">(</span><span class="n">t</span> <span class="o">-></span> <span class="cm">/* do something with each T */</span><span class="o">);</span> +</code></pre></div> +<p>We can create special cases. Say for mutating images</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="n">BufferedImage</span> <span class="n">in</span><span class="o">,</span> <span class="n">out</span><span class="o">;</span> +<span class="n">Aparapi</span><span class="o">.</span><span class="na">forEachPixel</span><span class="o">(</span><span class="n">in</span><span class="o">,</span> <span class="n">out</span><span class="o">,</span> <span class="n">rgb</span><span class="o">[]</span> <span class="o">-></span> <span class="n">rgb</span><span class="o">[</span><span class="mi">0</span><span class="o">]</span> <span class="o">=</span> <span class="mi">0</span> <span class="o">);</span> +</code></pre></div> +<p>We may also need select operations for associative operations</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="kd">class</span> <span class="nc">Person</span><span class="o">{</span> + <span class="kt">int</span> <span class="n">age</span><span class="o">;</span> + <span class="n">String</span> <span class="n">first</span><span class="o">;</span> + <span class="n">String</span> <span class="n">last</span><span class="o">;</span> +<span class="o">};</span> + +<span class="n">Aparapi</span><span class="o">.</span><span class="na">selectOne</span><span class="o">(</span><span class="n">Person</span><span class="o">[]</span> <span class="n">people</span><span class="o">,</span> <span class="o">(</span><span class="n">p1</span><span class="o">,</span><span class="n">p2</span><span class="o">)-></span> <span class="n">p1</span><span class="o">.</span><span class="na">age</span><span class="o">></span><span class="n">p2</span><span class="o">.</span><span class="na">age</span><span class="o">?</span><span class="nl">p1:</span><span class="n">p2</span> <span class="o">);</span> +</code></pre></div> +<h2>A case for map reduce</h2> + +<p>A mapper maps from one type to another. Possibly by extracting state. Here is a mapper which maps each String in an array of Strings to its length.</p> + +<p>As if the mapper was</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="kd">interface</span> <span class="nc">mapToInt</span><span class="o"><</span><span class="n">T</span><span class="o">>{</span> <span class="kt">int</span> <span class="nf">map</span><span class="o">(</span><span class="n">T</span> <span class="n">v</span><span class="o">);</span> <span class="o">}</span> +</code></pre></div> +<p>Here it is in action.</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="n">Aparapi</span><span class="o">.</span><span class="na">range</span><span class="o">(</span><span class="n">strings</span><span class="o">).</span><span class="na">map</span><span class="o">(</span><span class="n">s</span><span class="o">-></span><span class="n">string</span><span class="o">.</span><span class="na">length</span><span class="o">())...</span> +</code></pre></div> +<p>Now the result is a stream of int’s which can be ‘reduced’ by a reduction lambda.</p> + +<p>In this case the reduction reduces two int’s to one, by choosing the max of k and v. All reductions must be commutative style operations (max, min, add) where the order of execution is not important.</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="kt">int</span> <span class="n">lengthOfLongestString</span> <span class="o">=</span> <span class="n">Aparapi</span><span class="o">.</span><span class="na">range</span><span class="o">(</span><span class="n">strings</span><span class="o">).</span><span class="na">map</span><span class="o">(</span><span class="n">s</span><span class="o">-></span><span class="n">string</span><span class="o">.</span><span class="na">length</span><span class="o">()).</span><span class="na">reduce</span><span class="o">((</span><span class="n">k</span><span class="o">,</span><span class="n">v</span><span class="o">)-></span> <span class="n">k</span><span class="o">></span><span class="n">v</span><span class="o">?</span><span class="nl">k:</span><span class="n">v</span><span class="o">);</span> +</code></pre></div> +<p>Here we had a sum reduction.</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="kt">int</span> <span class="n">sumOfLengths</span> <span class="o">=</span> <span class="n">Aparapi</span><span class="o">.</span><span class="na">range</span><span class="o">(</span><span class="n">strings</span><span class="o">).</span><span class="na">map</span><span class="o">(</span><span class="n">s</span> <span class="o">-></span><span class="n">string</span><span class="o">.</span><span class="na">length</span><span class="o">()).</span><span class="na">reduce</span><span class="o">((</span><span class="n">k</span><span class="o">,</span><span class="n">v</span><span class="o">)-></span> <span class="n">k</span><span class="o">+</span><span class="n">v</span><span class="o">);</span> +</code></pre></div> +<p>Some of these may be common enough that we offer direct functionality.</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="kt">int</span> <span class="n">sumOfLengths</span> <span class="o">=</span> <span class="n">Aparapi</span><span class="o">.</span><span class="na">range</span><span class="o">(</span><span class="n">strings</span><span class="o">).</span><span class="na">map</span><span class="o">(</span><span class="n">s</span> <span class="o">-></span><span class="n">string</span><span class="o">.</span><span class="na">length</span><span class="o">()).</span><span class="na">sum</span><span class="o">();</span> +<span class="kt">int</span> <span class="n">maxOfLengths</span> <span class="o">=</span> <span class="n">Aparapi</span><span class="o">.</span><span class="na">range</span><span class="o">(</span><span class="n">strings</span><span class="o">).</span><span class="na">map</span><span class="o">(</span><span class="n">s</span> <span class="o">-></span><span class="n">string</span><span class="o">.</span><span class="na">length</span><span class="o">()).</span><span class="na">max</span><span class="o">();</span> +<span class="kt">int</span> <span class="n">minOfLengths</span> <span class="o">=</span> <span class="n">Aparapi</span><span class="o">.</span><span class="na">range</span><span class="o">(</span><span class="n">strings</span><span class="o">).</span><span class="na">map</span><span class="o">(</span><span class="n">s</span> <span class="o">-></span><span class="n">string</span><span class="o">.</span><span class="na">length</span><span class="o">()).</span><span class="na">min</span><span class="o">();</span> +<span class="n">String</span> <span class="n">string</span> <span class="o">=</span> <span class="n">Aparapi</span><span class="o">.</span><span class="na">range</span><span class="o">(</span><span class="n">strings</span><span class="o">).</span><span class="na">map</span><span class="o">(</span><span class="n">s</span><span class="o">-></span><span class="n">string</span><span class="o">.</span><span class="na">length</span><span class="o">()).</span><span class="na">select</span><span class="o">((</span><span class="n">k</span><span class="o">,</span><span class="n">v</span><span class="o">)-></span> <span class="n">k</span><span class="o">></span><span class="n">v</span><span class="o">);</span> +</code></pre></div> +<p>This last one needs some explaining. We map String to int then select the String whose length is the greatest.</p> + +</div> +</main> +<footer class='page-footer'> +<div class='container'> +<div class='row'> +<div class='col l4 s12'> +<h5 class='white-text'>Help Aparapi Grow</h5> +<p class='grey-text text-lighten-4'>We are a team of volunteers working on this project like it's our full time job. Any amount would help support and continue development on this project and is greatly appreciated.</p> +<form action='https://www.paypal.com/cgi-bin/webscr' id='paypal-donate' method='post' target='_top'> +<input name='cmd' type='hidden' value='_s-xclick'> +<input name='encrypted' type='hidden' value='-----BEGIN PKCS7-----MIIHoAYJKoZIhvcNAQcEoIIHkTCCB40CAQExggEwMIIBLAIBADCBlDCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20CAQAwDQYJKoZIhvcNAQEBBQAEgYATcKxN8t35TG2x34eY272SuZO3QbGy+BTGIM5DRV6Hmosotzw2TF42ceWmbXb3Gk4Wy5kUgo4TgHExCZHUSlHUl+A9KWLFejotgQJPhbiBsnns3klWbKftA3LEnP/kz/SW7OyBlpluoHoEGb354/aoX3JEctp3akHiZEmD7JyEgjELMAkGBSsOAwIaBQAwggEcBgkqhkiG9w0BBwEwFAYIKoZIhvcNAwcECOGCJwba6JICgIH4RtE1LE3juagKs+swI5tb9Y2LacWo+qn1H1aLKeg57bQMqqcWYvkoO1joYoglPc1h4mO0egZjHPQ6ih0K0IYlXw2SRpNylSlIMUE3GW6smjSSwRhscZfXQYUnmQsfYvkFwoKrlZGf/1u0Q7nwlZ1szIKnDMZ5f+k8xBcM0sMNutn/y9CH6A3zo01gQBIF29+1WYAoQspNAnfWQy3ydV7nbjIA9ThDp2WquWw3EVlvqlvm/3C2AFuH/L4q0ltn3qjkCdzXK0O2jW3TRrzligPkAy6CN0Tw2jGW5GENNC1L92vHFH4kBXUPlhvw39TgoN7/KRUjVoYPYgugggOHMIIDgzCCAuygAwIBAgIBADANBgkqhkiG9w0BAQUFADCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20wHhcNMDQwMjEzMTAxMzE1WhcNMzUwMjEzMTAxMzE1WjCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20wgZ8wDQYJKoZIhvcNAQEBBQADgY0AMIGJAoGBAMFHTt38RMxLXJyO2SmS+Ndl72T7oKJ4u4uw+6awntALWh03PewmIJuzbALScsTS4sZoS1fKciBGoh11gIfHzylvkdNe/hJl66/RGqrj5rFb08sAABNTzDTiqqNpJeBsYs/c2aiGozptX2RlnBktH+SUNpAajW724Nv2Wvhif6sFAgMBAAGjge4wgeswHQYDVR0OBBYEFJaffLvGbxe9WT9S1wob7BDWZJRrMIG7BgNVHSMEgbMwgbCAFJaffLvGbxe9WT9S1wob7BDWZJRroYGUpIGRMIGOMQswCQYDVQQGEwJVUzELMAkGA1UECBMCQ0ExFjAUBgNVBAcTDU1vdW50YWluIFZpZXcxFDASBgNVBAoTC1BheVBhbCBJbmMuMRMwEQYDVQQLFApsaXZlX2NlcnRzMREwDwYDVQQDFAhsaXZlX2FwaTEcMBoGCSqGSIb3DQEJARYNcmVAcGF5cGFsLmNvbYIBADAMBgNVHRMEBTADAQH/MA0GCSqGSIb3DQEBBQUAA4GBAIFfOlaagFrl71+jq6OKidbWFSE+Q4FqROvdgIONth+8kSK//Y/4ihuE4Ymvzn5ceE3S/iBSQQMjyvb+s2TWbQYDwcp129OPIbD9epdr4tJOUNiSojw7BHwYRiPh58S1xGlFgHFXwrEBb3dgNbMUa+u4qectsMAXpVHnD9wIyfmHMYIBmjCCAZYCAQEwgZQwgY4xCzAJBgNVBAYTAlVTMQswCQYDVQQIEwJDQTEWMBQGA1UEBxMNTW91bnRhaW4gVmlldzEUMBIGA1UEChMLUGF5UGFsIEluYy4xEzARBgNVBAsUCmxpdmVfY2VydHMxETAPBgNVBAMUCGxpdmVfYXBpMRwwGgYJKoZIhvcNAQkBFg1yZUBwYXlwYWwuY29tAgEAMAkGBSsOAwIaBQCgXTAYBgkqhkiG9w0BCQMxCwYJKoZIhvcNAQcBMBwGCSqGSIb3DQEJBTEPFw0xNjExMjkyMjA1NTNaMCMGCSqGSIb3DQEJBDEWBBS4i3Exr/pFcKOJy8uKmH+nGIMjqDANBgkqhkiG9w0BAQEFAASBgDAbFZ2jieloeB/0wCAcvYCFAIXmmBaMS5js/byzU5gK7exSTlRMX74IkmHemItaOcw3wyFlu4i118D9K9SbSbFiX9DGDcezGh42u/6G8TuZMwlvmiehwMioTVcm4jWG40YLiv8pJZypfoSx2w4IAFb4na5i/E1qOrwQOpiBho+s-----END PKCS7----- +'> +<button alt='PayPal - The safer, easier way to pay online!' class='btn waves-effect waves-light red lighten-3' name='action' type='submit'> +Donate Now +</button> +</form> +</div> +<div class='col l4 s12'> +<h5 class='white-text'>Join the Discussion</h5> +<p class='grey-text text-lighten-4'>We have a Gitter chat room set up where you can talk directly with us. Come in and discuss new features, future goals, general problems or questions, or anything else you can think of.</p> +<a class='btn waves-effect waves-light red lighten-3' href='https://gitter.im/Syncleus/aparapi' target='_blank'>Chat</a> +</div> +<div class='col l4 s12' style='overflow: hidden;'> +<h5 class='white-text'>Connect</h5> +<iframe allowtransparency='true' frameborder='0' height='30' scrolling='0' src='http://ghbtns.com/github-btn.html?user=Syncleus&repo=aparapi&type=watch&count=true&size=large' width='170'></iframe> +<br> +<a class='twitter-follow-button' data-dnt='true' data-show-count='true' data-size='large' href='https://twitter.com/AparapiLib'>Follow @AparapiLib</a> +<br> +<div class='g-follow' data-annotation='bubble' data-height='24' data-href='https://plus.google.com/102266131584900704956' data-rel='publisher'></div> +</div> +</div> +</div> +<div class='footer-copyright'> +<div class='container'> +© 2016-2017 Syncleus, All rights reserved. +<a class='grey-text text-lighten-4 right' href='https://github.com/Syncleus/aparapi/blob/master/LICENSE'>Apache License v2</a> +</div> +</div> +</footer> +<!-- Scripts --> +<script src='https://code.jquery.com/jquery-2.1.4.min.js'></script> +<script> + if (!window.jQuery) { document.write('<script src="bin/jquery-2.1.1.min.js"><\/script>'); } +</script> +<script src='/javascripts/jquery.timeago.js'></script> +<script src='/javascripts/materialize.min.js'></script> +<script src='/javascripts/lunr.min.js'></script> +<script src='/javascripts/search.js'></script> +<script src='/javascripts/materialize.js'></script> +<script src='/javascripts/init.js'></script> +<!-- Twitter Button --> +<script> + !function(d,s,id){var js,fjs=d.getElementsByTagName(s)[0],p=/^http:/.test(d.location)?'http':'https';if(!d.getElementById(id)){js=d.createElement(s);js.id=id;js.src=p+'://platform.twitter.com/widgets.js';fjs.parentNode.insertBefore(js,fjs);}}(document, 'script', 'twitter-wjs'); +</script> +<!-- Google Plus Button --> +<script async='' defer='defer' src='https://apis.google.com/js/platform.js'></script> +</body> </html> diff --git a/proposals/lambdas.html b/proposals/lambdas.html index c69c447de17f9e01f001e97a928e67ea7d4a5805..5a5413c6aee41e6c3f8ef2ba88c12711fa3e589f 100644 --- a/proposals/lambdas.html +++ b/proposals/lambdas.html @@ -1,232 +1,233 @@ <!DOCTYPE html> <html lang='en'> - <head> - <meta content='text/html; charset=UTF-8' http-equiv='Content-Type'> - <meta content='width=device-width, initial-scale=1' name='viewport'> - <meta content='IE=edge' http-equiv='X-UA-Compatible'> - <meta content='no' name='msapplication-tap-highlight'> - <meta content='Aparapi is an Open-source framework for executing native Java code on the GPU, developed by Syncleus.' name='description'> - <title> - Aparapi | Lambdas - </title> - <!-- Favicons --> - <link href='/images/favicon/apple-touch-icon-152x152.png' rel='apple-touch-icon-precomposed'> - <meta content='#FFFFFF' name='msapplication-TileColor'> - <meta content='/images/favicon/mstile-144x144.png' name='msapplication-TileImage'> - <link href='/images/favicon/favicon-32x32.png' rel='icon' sizes='32x32'> - <!-- Android 5 Chrome Color --> - <meta content='#EE6E73' name='theme-color'> - <!-- CSS --> - <link href='/stylesheets/highlight.css' media='screen,projection' rel='stylesheet' type='text/css'> - <link href='/stylesheets/style.css' media='screen,projection' rel='stylesheet' type='text/css'> - <link href='http://fonts.googleapis.com/css?family=Inconsolata' rel='stylesheet' type='text/css'> - <link href='http://fonts.googleapis.com/icon?family=Material+Icons' rel='stylesheet'> - </head> - <body> - <header> - <div class='container'> - <a class='button-collapse top-nav waves-effect waves-light circle hide-on-large-only' data-activates='nav-mobile' href='#'> - <i class='material-icons'>menu</i> - </a> - </div> - <ul class='side-nav fixed' id='nav-mobile'> - <li class='logo'> - <a class='brand-logo' href='/' id='logo-container'> - <object data='/images/logo.svg' id='front-page-logo' type='image/svg+xml'>Your browser does not support SVG</object> - </a> - </li> - <li class='search'> - <div class='search-wrapper card'> - <input id='search'> - <i class='material-icons'>search</i> - <div class='search-results'></div> - </div> - </li> - <li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/">Overview</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Introduction</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/about.html">About</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/getting-started.html">Getting Started</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/faq.html">FAQ</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Documentation</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/aparapi-patterns.html">Aparapi Patterns</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/choosing-specific-devices.html">Choosing Specific Devices</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/converting-java-to-opencl.html">Converting Java to OpenCL</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/emulating-multiple-entrypoints.html">Emulating Multiple Entrypoints</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/explicit-buffer-handling.html">Explicit Buffer Handling</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/hsa-enabled-lambda.html">HSA Enabled Lambda</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/kernel-guidelines.html">Kernel Guidelines</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/library-agent-duality.html">Library Agent Duality</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/new-features.html">New Features</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/opencl-bindings.html">OpenCL Bindings</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/private-memory-space.html">Private Memory Space</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/profiling-the-kernel.html">Profiling the Kernel</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/setting-up-hsa.html">Setting Up HSA</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/unit-tests.html">Unit Tests</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/using-hsa-simulator.html">Using HSA Simulator</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/constant-memory.html">Constant Memory</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/local-memory.html">Local Memory</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/multiple-dim-ranges.html">Multiple Dim Ranges</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Proposals</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/multiple-dim-nd-range.html">Multiple Dim ND Range</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/lambdas.html">Lambdas</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/address-space-with-buffers.html">Address Space with Buffers</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/extensions.html">Extensions</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/device.html">Device</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/multiple-entry-points.html">Multiple Entry Points</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/lambda-syntax.html">Lambda Syntax</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/showcase.html">Showcase</a></li></ul></li> - </ul> - </header> - <main> - <div class='section no-pad-bot' id='index-banner'> - <div class='container'> - <h1 class='header center-on-small-only'>Lambdas</h1> - <div class='row center'> - <h4 class='header col s12 light center'>Proposals for Java 8 Lambda Support to Aparapi.</h4> - </div> - - </div> - - </div> - <div class='container'> - <p>In the recently added “lambda” branch we have been experimenting with adding lambda support to Aparapi. We believe that this upcomming Java 8 feature will be a natural way to express parallel algorithms which can be executed on the GPU.</p> - - <p>A link to the branch can be found here preview.</p> - - <p>You will need to get the latest binary build of “Project Lambda” to experiment with these new features.</p> - - <p>Once you have a Lambda enabled Java 8 JDK Java set JAVA_HOME to your Java8 Lambda enabled compiler and build Aparapi.</p> - - <p>So from the root of SumatraExperiments just use</p> - <pre class="highlight shell"><code> - <span class="gp">$ </span>mvn - </code></pre> - <p>We are slowly walking through some of the Aparapi demos and converting them. At present NBody and Mandel have been converted.</p> - - <p>With Lambda enabled Aparapi we remove the need to derive from a base Kernel class, we will allow the user to express their code as a lambda using the following basic pattern</p> - <pre class="highlight java"><code> - <span class="n">Device</span><span class="o">.</span><span class="na">bestGPU</span><span class="o">().</span><span class="na">forEach</span><span class="o">(</span><span class="kt">int</span> <span class="n">range</span><span class="o">,</span> <span class="n">IntConsumer</span> <span class="n">lambda</span><span class="o">);</span> - </code></pre> - <p>The Java 8 stream API defines a type called java.util.function.IntConsumer. This is essentially an interface with a Single Abstract Method (these types are referred to as SAM types in the stream API code).</p> - - <p>IntConsumer looks something like….</p> - <pre class="highlight java"><code> - <span class="kd">interface</span> <span class="nc">IntConsumer</span><span class="o">{</span> - <span class="kd">public</span> <span class="kt">void</span> <span class="nf">accept</span><span class="o">(</span><span class="kt">int</span> <span class="n">Id</span><span class="o">);</span> - <span class="o">}</span> - </code></pre> - <p>So you can run the familiar ‘squares’ kernel using</p> - <pre class="highlight java"><code> - <span class="kt">int</span> <span class="n">in</span><span class="o">[]</span> <span class="o">=</span> <span class="o">..</span><span class="c1">//</span> - <span class="kt">int</span> <span class="n">out</span><span class="o">[]</span> <span class="o">=</span> <span class="o">.../</span> - <span class="n">Device</span><span class="o">.</span><span class="na">bestGPU</span><span class="o">().</span><span class="na">forEach</span><span class="o">(</span><span class="n">in</span><span class="o">.</span><span class="na">length</span><span class="o">,</span> <span class="o">(</span><span class="n">i</span><span class="o">)->{</span> - <span class="n">out</span><span class="o">[</span><span class="n">i</span><span class="o">]</span> <span class="o">=</span> <span class="n">in</span><span class="o">[</span><span class="n">i</span><span class="o">]</span> <span class="o">*</span> <span class="n">in</span><span class="o">[</span><span class="n">i</span><span class="o">];</span> - <span class="o">});</span> - </code></pre> - <p>Instead of</p> - <pre class="highlight java"><code> - <span class="kt">int</span> <span class="n">in</span><span class="o">[]</span> <span class="o">=</span> <span class="o">..</span><span class="c1">//</span> - <span class="kt">int</span> <span class="n">out</span><span class="o">[]</span> <span class="o">=</span> <span class="o">.../</span> - <span class="n">Device</span><span class="o">.</span><span class="na">bestGPU</span><span class="o">().</span><span class="na">forEach</span><span class="o">(</span><span class="n">in</span><span class="o">.</span><span class="na">length</span><span class="o">,</span> <span class="k">new</span> <span class="n">IntConsumer</span><span class="o">(){</span> - <span class="kd">public</span> <span class="kt">void</span> <span class="nf">accept</span><span class="o">(</span><span class="kt">int</span> <span class="n">i</span><span class="o">){</span> - <span class="n">out</span><span class="o">[</span><span class="n">i</span><span class="o">]</span> <span class="o">=</span> <span class="n">in</span><span class="o">[</span><span class="n">i</span><span class="o">]</span> <span class="o">*</span> <span class="n">in</span><span class="o">[</span><span class="n">i</span><span class="o">];</span> - <span class="o">}</span> - <span class="o">});</span> - </code></pre> - <p>To accomodate lambda’s we created Device.forEach(int range, IntConsumer ic) which converts the bytecode of the ic parameter to OpenCL at runtime. The captured args (in, out and i - in this case) are passed to the GPU and the kernel executed.</p> - - <p>During our early experiments we encountered an interesting issue. The new 'lambdafied’ javac uses Java 7 method handles and invoke dynamic instructions to dispatch the lambda code. It does this by injecting a call to a MethodHandle factory into the call site. At runtime, this factory creates a synthetic class (to capture call-site args) and passes this to our Device.forEach().</p> - - <p>We needed to analyse this synthetically generated class in order to work out which args need to be sent to the GPU. Of course we have a bunch of tools already in Aparapi for analyzing bytecode, but this code expects to find bytecode in class files (either in a Jar or on the disk), we had to find a way to access these classfile bytes to Aparapi.</p> - - <p>We have a couple of proposed solutions for solving this. The most promising is to turn the aparapi.dll/aparapi.so native library (used by Aparapi at runtime) into a JVMTI agent (like hprof). JVMTI agents are native libraries which have access to some aspects of a running JVM (via the JVM Tool Interface). We havea prototype JVMTI agent which 'listens’ for classfiles which represent these 'synthetic lambda helpers’ and allows us to get hold of the bytecode for these classes.</p> - - <p>This will mean that in future we will change how Aparapi is launched.</p> - - <p>Instead of</p> - <pre class="highlight shell"><code> - <span class="gp">$ </span>java -Djava.library.path<span class="o">=</span>path/to/aparapi -classpath path/to/aparapi/aparapi.jar:your.jar YourClass - </code></pre> - <p>We will use</p> - <pre class="highlight shell"><code> - <span class="gp">$ </span>java -agentlib<span class="o">=</span>path/to/aparapi/aparapi.dll -classpath path/to/aparapi/aparapi.jar:your.jar YourClass - </code></pre> - <p>We are also looking into the possibility of having this agent provide the bytecode for all Aparapi classes. We believe that this will enable us to ultimately remove MethodModel/ClassModel and even the InstructionSet classes and handling all of this in JNI.</p> - - <p>We would welcome comments on these proposals. Either here, or in the discussion list. Let us know what you think.</p> - - <h2>Consequences of lambdification of Aparapi.</h2> - - <ul> - <li>No support for local memory, group size or barriers in Lambda form</li> - <li>Calls to Kernel base class methods (such as getGlobalId()) will not be allowed. The 'global id’ will be passed as an arg to the lambda.</li> - <li>We will need to add support for calling static methods (of course the bytecode for the called methods cannot violate Aparapi restrictions).</li> - <li>We might need to drop support for multi dimension dispatch. This is more a convergence story with Sumatra (which is unlikely to support this)</li> - <li>Unlikely that explicit buffer management will be simple.</li> - <li>We can use lambda’s for control as well as the kernel itself. See examples below.</li> - </ul> - - <h2>Alternate forms for kernel dispatch</h2> - - <p>This version would allow us to carry over Aparapi’s device selection</p> - <pre class="highlight java"><code> - <span class="n">Device</span><span class="o">.</span><span class="na">bestGPU</span><span class="o">().</span><span class="na">forEach</span><span class="o">(</span><span class="mi">1024</span><span class="o">,</span> <span class="n">i</span><span class="o">->{</span><span class="n">lambda</span><span class="o">});</span> - </code></pre> - <p>This version would allow us to carry over Aparapi’s Range selection</p> - <pre class="highlight java"><code> - <span class="n">Device</span><span class="o">.</span><span class="na">bestGPU</span><span class="o">().</span><span class="na">range2D</span><span class="o">(</span><span class="n">width</span><span class="o">,</span> <span class="n">height</span><span class="o">).</span><span class="na">forEach</span><span class="o">(</span><span class="mi">1024</span><span class="o">,</span> <span class="n">rid</span><span class="o">->{</span><span class="n">lambda</span><span class="o">});</span> - </code></pre> - <p>This version would allow us to mimic Kernel.execute(1024, 5)</p> - <pre class="highlight java"><code> - <span class="n">Device</span><span class="o">.</span><span class="na">bestGPU</span><span class="o">().</span><span class="na">forEach</span><span class="o">(</span><span class="mi">1024</span><span class="o">,</span> <span class="mi">5</span><span class="o">,</span> <span class="o">(</span><span class="n">id</span><span class="o">,</span> <span class="n">passid</span><span class="o">)->{</span><span class="n">lambda</span><span class="o">});</span> - </code></pre> - <p>We could even have the range iterated over until some other lambda determines we are done</p> - <pre class="highlight java"><code> - <span class="n">Device</span><span class="o">.</span><span class="na">bestGPU</span><span class="o">().</span><span class="na">forEachUntil</span><span class="o">(</span><span class="mi">1024</span><span class="o">,</span> <span class="n">id</span><span class="o">->{</span><span class="n">lambda</span><span class="o">},</span> <span class="o">->{</span><span class="n">predicate</span> <span class="n">lambda</span><span class="o">});</span> - </code></pre> - <p>Explicit buffer handling could be removed in many cases by allowing the bytecode of the 'until’ predicate to be snooped for buffer references.</p> - <pre class="highlight java"><code> - <span class="kt">int</span> <span class="n">lotsOfData</span><span class="o">[]</span> <span class="o">=</span> <span class="o">...;</span> - <span class="kt">boolean</span> <span class="n">found</span><span class="o">[</span><span class="kc">false</span><span class="o">]</span> <span class="o">=</span> <span class="k">new</span> <span class="kt">boolean</span><span class="o">[</span><span class="mi">1</span><span class="o">];</span> - <span class="n">Device</span><span class="o">.</span><span class="na">bestGPU</span><span class="o">().</span><span class="na">forEachUntil</span><span class="o">(</span><span class="mi">1024</span><span class="o">,</span> <span class="mi">5</span><span class="o">,</span> - <span class="o">(</span><span class="n">id</span><span class="o">,</span> <span class="n">passid</span><span class="o">)->{</span> <span class="cm">/* mutate lotsOfData, found[0]=true when done */</span> <span class="o">}</span> - <span class="o">->{</span><span class="n">found</span><span class="o">[</span><span class="mi">0</span><span class="o">]]});</span> - </code></pre> - <p>In the above cases Aparapi can determine that between each pass it needs to “ONLY” copy found[] back from the device.</p> - - <p>There is no reason that the range itself needs to be constant, we can use a collection/iterable. This helps with some reductions.</p> - <pre class="highlight java"><code> - <span class="kt">int</span> <span class="n">range</span><span class="o">[]</span> <span class="o">=</span> <span class="k">new</span> <span class="kt">int</span><span class="o">[]{</span><span class="mi">1024</span><span class="o">,</span><span class="mi">512</span><span class="o">,</span><span class="mi">128</span><span class="o">,</span><span class="mi">64</span><span class="o">,</span><span class="mi">32</span><span class="o">,</span><span class="mi">16</span><span class="o">,</span><span class="mi">8</span><span class="o">,</span><span class="mi">4</span><span class="o">,</span><span class="mi">2</span><span class="o">,</span><span class="mi">1</span><span class="o">,</span><span class="mi">0</span><span class="o">};</span> - <span class="n">Device</span><span class="o">.</span><span class="na">bestGPU</span><span class="o">().</span><span class="na">forEach</span><span class="o">(</span><span class="n">range</span><span class="o">,{</span><span class="n">lambda</span><span class="o">});</span> - </code></pre> - <p>or the range can be a lambda itself, here we specify a start and end value for the range itself, and a lambda to provide each step.</p> - <pre class="highlight java"><code> - <span class="n">Device</span><span class="o">.</span><span class="na">bestGPU</span><span class="o">().</span><span class="na">forEach</span><span class="o">(</span><span class="mi">1024</span><span class="o">,</span> <span class="mi">1</span><span class="o">,</span> <span class="n">r</span><span class="o">->{</span><span class="k">return</span><span class="o">(</span><span class="n">r</span><span class="o">/</span><span class="mi">2</span><span class="o">);},(</span><span class="n">pass</span><span class="o">,</span> <span class="n">r</span><span class="o">,</span> <span class="n">id</span><span class="o">)->{</span><span class="n">lambda</span><span class="o">});</span> - <span class="c1">// or</span> - <span class="n">Device</span><span class="o">.</span><span class="na">bestGPU</span><span class="o">().</span><span class="na">forEach</span><span class="o">(</span><span class="mi">1</span><span class="o">,</span> <span class="mi">1024</span><span class="o">,</span> <span class="n">r</span><span class="o">->{</span><span class="k">return</span><span class="o">(</span><span class="n">r</span><span class="o">*</span><span class="mi">2</span><span class="o">);},(</span><span class="n">pass</span><span class="o">,</span> <span class="n">r</span><span class="o">,</span> <span class="n">id</span><span class="o">)->{</span><span class="n">lambda</span><span class="o">});</span> - </code></pre> - </div> - </main> - <footer class='page-footer'> - <div class='container'> - <div class='row'> - <div class='col l4 s12'> - <h5 class='white-text'>Help Aparapi Grow</h5> - <p class='grey-text text-lighten-4'>We are a team of volunteers working on this project like it's our full time job. Any amount would help support and continue development on this project and is greatly appreciated.</p> - <form action='https://www.paypal.com/cgi-bin/webscr' id='paypal-donate' method='post' target='_top'> - <input name='cmd' type='hidden' value='_s-xclick'> - <input name='encrypted' type='hidden' value='-----BEGIN PKCS7-----MIIHoAYJKoZIhvcNAQcEoIIHkTCCB40CAQExggEwMIIBLAIBADCBlDCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20CAQAwDQYJKoZIhvcNAQEBBQAEgYATcKxN8t35TG2x34eY272SuZO3QbGy+BTGIM5DRV6Hmosotzw2TF42ceWmbXb3Gk4Wy5kUgo4TgHExCZHUSlHUl+A9KWLFejotgQJPhbiBsnns3klWbKftA3LEnP/kz/SW7OyBlpluoHoEGb354/aoX3JEctp3akHiZEmD7JyEgjELMAkGBSsOAwIaBQAwggEcBgkqhkiG9w0BBwEwFAYIKoZIhvcNAwcECOGCJwba6JICgIH4RtE1LE3juagKs+swI5tb9Y2LacWo+qn1H1aLKeg57bQMqqcWYvkoO1joYoglPc1h4mO0egZjHPQ6ih0K0IYlXw2SRpNylSlIMUE3GW6smjSSwRhscZfXQYUnmQsfYvkFwoKrlZGf/1u0Q7nwlZ1szIKnDMZ5f+k8xBcM0sMNutn/y9CH6A3zo01gQBIF29+1WYAoQspNAnfWQy3ydV7nbjIA9ThDp2WquWw3EVlvqlvm/3C2AFuH/L4q0ltn3qjkCdzXK0O2jW3TRrzligPkAy6CN0Tw2jGW5GENNC1L92vHFH4kBXUPlhvw39TgoN7/KRUjVoYPYgugggOHMIIDgzCCAuygAwIBAgIBADANBgkqhkiG9w0BAQUFADCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20wHhcNMDQwMjEzMTAxMzE1WhcNMzUwMjEzMTAxMzE1WjCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20wgZ8wDQYJKoZIhvcNAQEBBQADgY0AMIGJAoGBAMFHTt38RMxLXJyO2SmS+Ndl72T7oKJ4u4uw+6awntALWh03PewmIJuzbALScsTS4sZoS1fKciBGoh11gIfHzylvkdNe/hJl66/RGqrj5rFb08sAABNTzDTiqqNpJeBsYs/c2aiGozptX2RlnBktH+SUNpAajW724Nv2Wvhif6sFAgMBAAGjge4wgeswHQYDVR0OBBYEFJaffLvGbxe9WT9S1wob7BDWZJRrMIG7BgNVHSMEgbMwgbCAFJaffLvGbxe9WT9S1wob7BDWZJRroYGUpIGRMIGOMQswCQYDVQQGEwJVUzELMAkGA1UECBMCQ0ExFjAUBgNVBAcTDU1vdW50YWluIFZpZXcxFDASBgNVBAoTC1BheVBhbCBJbmMuMRMwEQYDVQQLFApsaXZlX2NlcnRzMREwDwYDVQQDFAhsaXZlX2FwaTEcMBoGCSqGSIb3DQEJARYNcmVAcGF5cGFsLmNvbYIBADAMBgNVHRMEBTADAQH/MA0GCSqGSIb3DQEBBQUAA4GBAIFfOlaagFrl71+jq6OKidbWFSE+Q4FqROvdgIONth+8kSK//Y/4ihuE4Ymvzn5ceE3S/iBSQQMjyvb+s2TWbQYDwcp129OPIbD9epdr4tJOUNiSojw7BHwYRiPh58S1xGlFgHFXwrEBb3dgNbMUa+u4qectsMAXpVHnD9wIyfmHMYIBmjCCAZYCAQEwgZQwgY4xCzAJBgNVBAYTAlVTMQswCQYDVQQIEwJDQTEWMBQGA1UEBxMNTW91bnRhaW4gVmlldzEUMBIGA1UEChMLUGF5UGFsIEluYy4xEzARBgNVBAsUCmxpdmVfY2VydHMxETAPBgNVBAMUCGxpdmVfYXBpMRwwGgYJKoZIhvcNAQkBFg1yZUBwYXlwYWwuY29tAgEAMAkGBSsOAwIaBQCgXTAYBgkqhkiG9w0BCQMxCwYJKoZIhvcNAQcBMBwGCSqGSIb3DQEJBTEPFw0xNjExMjkyMjA1NTNaMCMGCSqGSIb3DQEJBDEWBBS4i3Exr/pFcKOJy8uKmH+nGIMjqDANBgkqhkiG9w0BAQEFAASBgDAbFZ2jieloeB/0wCAcvYCFAIXmmBaMS5js/byzU5gK7exSTlRMX74IkmHemItaOcw3wyFlu4i118D9K9SbSbFiX9DGDcezGh42u/6G8TuZMwlvmiehwMioTVcm4jWG40YLiv8pJZypfoSx2w4IAFb4na5i/E1qOrwQOpiBho+s-----END PKCS7-----'> - <button alt='PayPal - The safer, easier way to pay online!' class='btn waves-effect waves-light red lighten-3' name='action' type='submit'> - Donate Now - </button> - </form> - </div> - <div class='col l4 s12'> - <h5 class='white-text'>Join the Discussion</h5> - <p class='grey-text text-lighten-4'>We have a Gitter chat room set up where you can talk directly with us. Come in and discuss new features, future goals, general problems or questions, or anything else you can think of.</p> - <a class='btn waves-effect waves-light red lighten-3' href='https://gitter.im/Syncleus/aparapi' target='_blank'>Chat</a> - </div> - <div class='col l4 s12' style='overflow: hidden;'> - <h5 class='white-text'>Connect</h5> - <iframe allowtransparency='true' frameborder='0' height='30' scrolling='0' src='http://ghbtns.com/github-btn.html?user=Syncleus&repo=aparapi&type=watch&count=true&size=large' width='170'></iframe> - <br> - <a class='twitter-follow-button' data-dnt='true' data-show-count='true' data-size='large' href='https://twitter.com/AparapiLib'>Follow @AparapiLib</a> - <br> - <div class='g-follow' data-annotation='bubble' data-height='24' data-href='https://plus.google.com/102266131584900704956' data-rel='publisher'></div> - </div> - </div> - </div> - <div class='footer-copyright'> - <div class='container'> - © 2016-2017 Syncleus, All rights reserved. - <a class='grey-text text-lighten-4 right' href='https://github.com/Syncleus/aparapi/blob/master/LICENSE'>Apache License v2</a> - </div> - </div> - </footer> - <!-- Scripts --> - <script src='https://code.jquery.com/jquery-2.1.4.min.js'></script> - <script> - if (!window.jQuery) { document.write('<script src="bin/jquery-2.1.1.min.js"><\/script>'); } - </script> - <script src='/javascripts/jquery.timeago.js'></script> - <script src='/javascripts/materialize.min.js'></script> - <script src='/javascripts/lunr.min.js'></script> - <script src='/javascripts/search.js'></script> - <script src='/javascripts/materialize.js'></script> - <script src='/javascripts/init.js'></script> - <!-- Twitter Button --> - <script> - !function(d,s,id){var js,fjs=d.getElementsByTagName(s)[0],p=/^http:/.test(d.location)?'http':'https';if(!d.getElementById(id)){js=d.createElement(s);js.id=id;js.src=p+'://platform.twitter.com/widgets.js';fjs.parentNode.insertBefore(js,fjs);}}(document, 'script', 'twitter-wjs'); - </script> - <!-- Google Plus Button --> - <script async='' defer='defer' src='https://apis.google.com/js/platform.js'></script> - </body> +<head> +<meta content='text/html; charset=UTF-8' http-equiv='Content-Type'> +<meta content='width=device-width, initial-scale=1' name='viewport'> +<meta content='IE=edge' http-equiv='X-UA-Compatible'> +<meta content='no' name='msapplication-tap-highlight'> +<meta content='Aparapi is an Open-source framework for executing native Java code on the GPU, developed by Syncleus.' name='description'> +<title> +Aparapi | Lambdas +</title> +<!-- Favicons --> +<link href='/images/favicon/apple-touch-icon-152x152.png' rel='apple-touch-icon-precomposed'> +<meta content='#FFFFFF' name='msapplication-TileColor'> +<meta content='/images/favicon/mstile-144x144.png' name='msapplication-TileImage'> +<link href='/images/favicon/favicon-32x32.png' rel='icon' sizes='32x32'> +<!-- Android 5 Chrome Color --> +<meta content='#EE6E73' name='theme-color'> +<!-- CSS --> +<link href='/stylesheets/highlight.css' media='screen,projection' rel='stylesheet' type='text/css'> +<link href='/stylesheets/style.css' media='screen,projection' rel='stylesheet' type='text/css'> +<link href='http://fonts.googleapis.com/css?family=Inconsolata' rel='stylesheet' type='text/css'> +<link href='http://fonts.googleapis.com/icon?family=Material+Icons' rel='stylesheet'> +</head> +<body> +<header> +<div class='container'> +<a class='button-collapse top-nav waves-effect waves-light circle hide-on-large-only' data-activates='nav-mobile' href='#'> +<i class='material-icons'>menu</i> +</a> +</div> +<ul class='side-nav fixed' id='nav-mobile'> +<li class='logo'> +<a class='brand-logo' href='/' id='logo-container'> +<object data='/images/logo.svg' id='front-page-logo' type='image/svg+xml'>Your browser does not support SVG</object> +</a> +</li> +<li class='search'> +<div class='search-wrapper card'> +<input id='search'> +<i class='material-icons'>search</i> +<div class='search-results'></div> +</div> +</li> +<li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/">Overview</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Introduction</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/about.html">About</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/getting-started.html">Getting Started</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/faq.html">FAQ</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Documentation</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/aparapi-patterns.html">Aparapi Patterns</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/choosing-specific-devices.html">Choosing Specific Devices</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/converting-java-to-opencl.html">Converting Java to OpenCL</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/emulating-multiple-entrypoints.html">Emulating Multiple Entrypoints</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/explicit-buffer-handling.html">Explicit Buffer Handling</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/hsa-enabled-lambda.html">HSA Enabled Lambda</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/kernel-guidelines.html">Kernel Guidelines</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/library-agent-duality.html">Library Agent Duality</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/new-features.html">New Features</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/opencl-bindings.html">OpenCL Bindings</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/private-memory-space.html">Private Memory Space</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/profiling-the-kernel.html">Profiling the Kernel</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/setting-up-hsa.html">Setting Up HSA</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/unit-tests.html">Unit Tests</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/using-hsa-simulator.html">Using HSA Simulator</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/constant-memory.html">Constant Memory</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/local-memory.html">Local Memory</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/multiple-dim-ranges.html">Multiple Dim Ranges</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Proposals</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/multiple-dim-nd-range.html">Multiple Dim ND Range</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/lambdas.html">Lambdas</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/address-space-with-buffers.html">Address Space with Buffers</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/extensions.html">Extensions</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/device.html">Device</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/multiple-entry-points.html">Multiple Entry Points</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/lambda-syntax.html">Lambda Syntax</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/showcase.html">Showcase</a></li></ul></li> +</ul> +</header> +<main> +<div class='section no-pad-bot' id='index-banner'> +<div class='container'> +<h1 class='header center-on-small-only'>Lambdas</h1> +<div class='row center'> +<h4 class='header col s12 light center'>Proposals for Java 8 Lambda Support to Aparapi.</h4> +</div> + +</div> + +</div> +<div class='container'> +<p>In the recently added “lambda” branch we have been experimenting with adding lambda support to Aparapi. We believe that this upcomming Java 8 feature will be a natural way to express parallel algorithms which can be executed on the GPU.</p> + +<p>A link to the branch can be found here preview.</p> + +<p>You will need to get the latest binary build of “Project Lambda” to experiment with these new features.</p> + +<p>Once you have a Lambda enabled Java 8 JDK Java set JAVA_HOME to your Java8 Lambda enabled compiler and build Aparapi.</p> + +<p>So from the root of SumatraExperiments just use</p> +<div class="highlight"><pre class="highlight shell"><code> +<span class="nv">$ </span>mvn +</code></pre></div> +<p>We are slowly walking through some of the Aparapi demos and converting them. At present NBody and Mandel have been converted.</p> + +<p>With Lambda enabled Aparapi we remove the need to derive from a base Kernel class, we will allow the user to express their code as a lambda using the following basic pattern</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="n">Device</span><span class="o">.</span><span class="na">bestGPU</span><span class="o">().</span><span class="na">forEach</span><span class="o">(</span><span class="kt">int</span> <span class="n">range</span><span class="o">,</span> <span class="n">IntConsumer</span> <span class="n">lambda</span><span class="o">);</span> +</code></pre></div> +<p>The Java 8 stream API defines a type called java.util.function.IntConsumer. This is essentially an interface with a Single Abstract Method (these types are referred to as SAM types in the stream API code).</p> + +<p>IntConsumer looks something like….</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="kd">interface</span> <span class="nc">IntConsumer</span><span class="o">{</span> + <span class="kd">public</span> <span class="kt">void</span> <span class="nf">accept</span><span class="o">(</span><span class="kt">int</span> <span class="n">Id</span><span class="o">);</span> +<span class="o">}</span> +</code></pre></div> +<p>So you can run the familiar ‘squares’ kernel using</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="kt">int</span> <span class="n">in</span><span class="o">[]</span> <span class="o">=</span> <span class="o">..</span><span class="c1">//</span> +<span class="kt">int</span> <span class="n">out</span><span class="o">[]</span> <span class="o">=</span> <span class="o">.../</span> +<span class="n">Device</span><span class="o">.</span><span class="na">bestGPU</span><span class="o">().</span><span class="na">forEach</span><span class="o">(</span><span class="n">in</span><span class="o">.</span><span class="na">length</span><span class="o">,</span> <span class="o">(</span><span class="n">i</span><span class="o">)->{</span> + <span class="n">out</span><span class="o">[</span><span class="n">i</span><span class="o">]</span> <span class="o">=</span> <span class="n">in</span><span class="o">[</span><span class="n">i</span><span class="o">]</span> <span class="o">*</span> <span class="n">in</span><span class="o">[</span><span class="n">i</span><span class="o">];</span> + <span class="o">});</span> +</code></pre></div> +<p>Instead of</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="kt">int</span> <span class="n">in</span><span class="o">[]</span> <span class="o">=</span> <span class="o">..</span><span class="c1">//</span> +<span class="kt">int</span> <span class="n">out</span><span class="o">[]</span> <span class="o">=</span> <span class="o">.../</span> +<span class="n">Device</span><span class="o">.</span><span class="na">bestGPU</span><span class="o">().</span><span class="na">forEach</span><span class="o">(</span><span class="n">in</span><span class="o">.</span><span class="na">length</span><span class="o">,</span> <span class="k">new</span> <span class="n">IntConsumer</span><span class="o">(){</span> + <span class="kd">public</span> <span class="kt">void</span> <span class="nf">accept</span><span class="o">(</span><span class="kt">int</span> <span class="n">i</span><span class="o">){</span> + <span class="n">out</span><span class="o">[</span><span class="n">i</span><span class="o">]</span> <span class="o">=</span> <span class="n">in</span><span class="o">[</span><span class="n">i</span><span class="o">]</span> <span class="o">*</span> <span class="n">in</span><span class="o">[</span><span class="n">i</span><span class="o">];</span> + <span class="o">}</span> + <span class="o">});</span> +</code></pre></div> +<p>To accomodate lambda’s we created Device.forEach(int range, IntConsumer ic) which converts the bytecode of the ic parameter to OpenCL at runtime. The captured args (in, out and i - in this case) are passed to the GPU and the kernel executed.</p> + +<p>During our early experiments we encountered an interesting issue. The new ‘lambdafied’ javac uses Java 7 method handles and invoke dynamic instructions to dispatch the lambda code. It does this by injecting a call to a MethodHandle factory into the call site. At runtime, this factory creates a synthetic class (to capture call-site args) and passes this to our Device.forEach().</p> + +<p>We needed to analyse this synthetically generated class in order to work out which args need to be sent to the GPU. Of course we have a bunch of tools already in Aparapi for analyzing bytecode, but this code expects to find bytecode in class files (either in a Jar or on the disk), we had to find a way to access these classfile bytes to Aparapi.</p> + +<p>We have a couple of proposed solutions for solving this. The most promising is to turn the aparapi.dll/aparapi.so native library (used by Aparapi at runtime) into a JVMTI agent (like hprof). JVMTI agents are native libraries which have access to some aspects of a running JVM (via the JVM Tool Interface). We havea prototype JVMTI agent which ‘listens’ for classfiles which represent these ‘synthetic lambda helpers’ and allows us to get hold of the bytecode for these classes.</p> + +<p>This will mean that in future we will change how Aparapi is launched.</p> + +<p>Instead of</p> +<div class="highlight"><pre class="highlight shell"><code> +<span class="nv">$ </span>java <span class="nt">-Djava</span>.library.path<span class="o">=</span>path/to/aparapi <span class="nt">-classpath</span> path/to/aparapi/aparapi.jar:your.jar YourClass +</code></pre></div> +<p>We will use</p> +<div class="highlight"><pre class="highlight shell"><code> +<span class="nv">$ </span>java <span class="nt">-agentlib</span><span class="o">=</span>path/to/aparapi/aparapi.dll <span class="nt">-classpath</span> path/to/aparapi/aparapi.jar:your.jar YourClass +</code></pre></div> +<p>We are also looking into the possibility of having this agent provide the bytecode for all Aparapi classes. We believe that this will enable us to ultimately remove MethodModel/ClassModel and even the InstructionSet classes and handling all of this in JNI.</p> + +<p>We would welcome comments on these proposals. Either here, or in the discussion list. Let us know what you think.</p> + +<h2>Consequences of lambdification of Aparapi.</h2> + +<ul> +<li>No support for local memory, group size or barriers in Lambda form</li> +<li>Calls to Kernel base class methods (such as getGlobalId()) will not be allowed. The ‘global id’ will be passed as an arg to the lambda.</li> +<li>We will need to add support for calling static methods (of course the bytecode for the called methods cannot violate Aparapi restrictions).</li> +<li>We might need to drop support for multi dimension dispatch. This is more a convergence story with Sumatra (which is unlikely to support this)</li> +<li>Unlikely that explicit buffer management will be simple.</li> +<li>We can use lambda’s for control as well as the kernel itself. See examples below.</li> +</ul> + +<h2>Alternate forms for kernel dispatch</h2> + +<p>This version would allow us to carry over Aparapi’s device selection</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="n">Device</span><span class="o">.</span><span class="na">bestGPU</span><span class="o">().</span><span class="na">forEach</span><span class="o">(</span><span class="mi">1024</span><span class="o">,</span> <span class="n">i</span><span class="o">->{</span><span class="n">lambda</span><span class="o">});</span> +</code></pre></div> +<p>This version would allow us to carry over Aparapi’s Range selection</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="n">Device</span><span class="o">.</span><span class="na">bestGPU</span><span class="o">().</span><span class="na">range2D</span><span class="o">(</span><span class="n">width</span><span class="o">,</span> <span class="n">height</span><span class="o">).</span><span class="na">forEach</span><span class="o">(</span><span class="mi">1024</span><span class="o">,</span> <span class="n">rid</span><span class="o">->{</span><span class="n">lambda</span><span class="o">});</span> +</code></pre></div> +<p>This version would allow us to mimic Kernel.execute(1024, 5)</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="n">Device</span><span class="o">.</span><span class="na">bestGPU</span><span class="o">().</span><span class="na">forEach</span><span class="o">(</span><span class="mi">1024</span><span class="o">,</span> <span class="mi">5</span><span class="o">,</span> <span class="o">(</span><span class="n">id</span><span class="o">,</span> <span class="n">passid</span><span class="o">)->{</span><span class="n">lambda</span><span class="o">});</span> +</code></pre></div> +<p>We could even have the range iterated over until some other lambda determines we are done</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="n">Device</span><span class="o">.</span><span class="na">bestGPU</span><span class="o">().</span><span class="na">forEachUntil</span><span class="o">(</span><span class="mi">1024</span><span class="o">,</span> <span class="n">id</span><span class="o">->{</span><span class="n">lambda</span><span class="o">},</span> <span class="o">->{</span><span class="n">predicate</span> <span class="n">lambda</span><span class="o">});</span> +</code></pre></div> +<p>Explicit buffer handling could be removed in many cases by allowing the bytecode of the ‘until’ predicate to be snooped for buffer references.</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="kt">int</span> <span class="n">lotsOfData</span><span class="o">[]</span> <span class="o">=</span> <span class="o">...;</span> +<span class="kt">boolean</span> <span class="n">found</span><span class="o">[</span><span class="kc">false</span><span class="o">]</span> <span class="o">=</span> <span class="k">new</span> <span class="kt">boolean</span><span class="o">[</span><span class="mi">1</span><span class="o">];</span> +<span class="n">Device</span><span class="o">.</span><span class="na">bestGPU</span><span class="o">().</span><span class="na">forEachUntil</span><span class="o">(</span><span class="mi">1024</span><span class="o">,</span> <span class="mi">5</span><span class="o">,</span> + <span class="o">(</span><span class="n">id</span><span class="o">,</span> <span class="n">passid</span><span class="o">)->{</span> <span class="cm">/* mutate lotsOfData, found[0]=true when done */</span> <span class="o">}</span> + <span class="o">->{</span><span class="n">found</span><span class="o">[</span><span class="mi">0</span><span class="o">]]});</span> +</code></pre></div> +<p>In the above cases Aparapi can determine that between each pass it needs to “ONLY” copy found[] back from the device.</p> + +<p>There is no reason that the range itself needs to be constant, we can use a collection/iterable. This helps with some reductions.</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="kt">int</span> <span class="n">range</span><span class="o">[]</span> <span class="o">=</span> <span class="k">new</span> <span class="kt">int</span><span class="o">[]{</span><span class="mi">1024</span><span class="o">,</span><span class="mi">512</span><span class="o">,</span><span class="mi">128</span><span class="o">,</span><span class="mi">64</span><span class="o">,</span><span class="mi">32</span><span class="o">,</span><span class="mi">16</span><span class="o">,</span><span class="mi">8</span><span class="o">,</span><span class="mi">4</span><span class="o">,</span><span class="mi">2</span><span class="o">,</span><span class="mi">1</span><span class="o">,</span><span class="mi">0</span><span class="o">};</span> +<span class="n">Device</span><span class="o">.</span><span class="na">bestGPU</span><span class="o">().</span><span class="na">forEach</span><span class="o">(</span><span class="n">range</span><span class="o">,{</span><span class="n">lambda</span><span class="o">});</span> +</code></pre></div> +<p>or the range can be a lambda itself, here we specify a start and end value for the range itself, and a lambda to provide each step.</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="n">Device</span><span class="o">.</span><span class="na">bestGPU</span><span class="o">().</span><span class="na">forEach</span><span class="o">(</span><span class="mi">1024</span><span class="o">,</span> <span class="mi">1</span><span class="o">,</span> <span class="n">r</span><span class="o">->{</span><span class="k">return</span><span class="o">(</span><span class="n">r</span><span class="o">/</span><span class="mi">2</span><span class="o">);},(</span><span class="n">pass</span><span class="o">,</span> <span class="n">r</span><span class="o">,</span> <span class="n">id</span><span class="o">)->{</span><span class="n">lambda</span><span class="o">});</span> +<span class="c1">// or</span> +<span class="n">Device</span><span class="o">.</span><span class="na">bestGPU</span><span class="o">().</span><span class="na">forEach</span><span class="o">(</span><span class="mi">1</span><span class="o">,</span> <span class="mi">1024</span><span class="o">,</span> <span class="n">r</span><span class="o">->{</span><span class="k">return</span><span class="o">(</span><span class="n">r</span><span class="o">*</span><span class="mi">2</span><span class="o">);},(</span><span class="n">pass</span><span class="o">,</span> <span class="n">r</span><span class="o">,</span> <span class="n">id</span><span class="o">)->{</span><span class="n">lambda</span><span class="o">});</span> +</code></pre></div> +</div> +</main> +<footer class='page-footer'> +<div class='container'> +<div class='row'> +<div class='col l4 s12'> +<h5 class='white-text'>Help Aparapi Grow</h5> +<p class='grey-text text-lighten-4'>We are a team of volunteers working on this project like it's our full time job. Any amount would help support and continue development on this project and is greatly appreciated.</p> +<form action='https://www.paypal.com/cgi-bin/webscr' id='paypal-donate' method='post' target='_top'> +<input name='cmd' type='hidden' value='_s-xclick'> +<input name='encrypted' type='hidden' value='-----BEGIN PKCS7-----MIIHoAYJKoZIhvcNAQcEoIIHkTCCB40CAQExggEwMIIBLAIBADCBlDCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20CAQAwDQYJKoZIhvcNAQEBBQAEgYATcKxN8t35TG2x34eY272SuZO3QbGy+BTGIM5DRV6Hmosotzw2TF42ceWmbXb3Gk4Wy5kUgo4TgHExCZHUSlHUl+A9KWLFejotgQJPhbiBsnns3klWbKftA3LEnP/kz/SW7OyBlpluoHoEGb354/aoX3JEctp3akHiZEmD7JyEgjELMAkGBSsOAwIaBQAwggEcBgkqhkiG9w0BBwEwFAYIKoZIhvcNAwcECOGCJwba6JICgIH4RtE1LE3juagKs+swI5tb9Y2LacWo+qn1H1aLKeg57bQMqqcWYvkoO1joYoglPc1h4mO0egZjHPQ6ih0K0IYlXw2SRpNylSlIMUE3GW6smjSSwRhscZfXQYUnmQsfYvkFwoKrlZGf/1u0Q7nwlZ1szIKnDMZ5f+k8xBcM0sMNutn/y9CH6A3zo01gQBIF29+1WYAoQspNAnfWQy3ydV7nbjIA9ThDp2WquWw3EVlvqlvm/3C2AFuH/L4q0ltn3qjkCdzXK0O2jW3TRrzligPkAy6CN0Tw2jGW5GENNC1L92vHFH4kBXUPlhvw39TgoN7/KRUjVoYPYgugggOHMIIDgzCCAuygAwIBAgIBADANBgkqhkiG9w0BAQUFADCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20wHhcNMDQwMjEzMTAxMzE1WhcNMzUwMjEzMTAxMzE1WjCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20wgZ8wDQYJKoZIhvcNAQEBBQADgY0AMIGJAoGBAMFHTt38RMxLXJyO2SmS+Ndl72T7oKJ4u4uw+6awntALWh03PewmIJuzbALScsTS4sZoS1fKciBGoh11gIfHzylvkdNe/hJl66/RGqrj5rFb08sAABNTzDTiqqNpJeBsYs/c2aiGozptX2RlnBktH+SUNpAajW724Nv2Wvhif6sFAgMBAAGjge4wgeswHQYDVR0OBBYEFJaffLvGbxe9WT9S1wob7BDWZJRrMIG7BgNVHSMEgbMwgbCAFJaffLvGbxe9WT9S1wob7BDWZJRroYGUpIGRMIGOMQswCQYDVQQGEwJVUzELMAkGA1UECBMCQ0ExFjAUBgNVBAcTDU1vdW50YWluIFZpZXcxFDASBgNVBAoTC1BheVBhbCBJbmMuMRMwEQYDVQQLFApsaXZlX2NlcnRzMREwDwYDVQQDFAhsaXZlX2FwaTEcMBoGCSqGSIb3DQEJARYNcmVAcGF5cGFsLmNvbYIBADAMBgNVHRMEBTADAQH/MA0GCSqGSIb3DQEBBQUAA4GBAIFfOlaagFrl71+jq6OKidbWFSE+Q4FqROvdgIONth+8kSK//Y/4ihuE4Ymvzn5ceE3S/iBSQQMjyvb+s2TWbQYDwcp129OPIbD9epdr4tJOUNiSojw7BHwYRiPh58S1xGlFgHFXwrEBb3dgNbMUa+u4qectsMAXpVHnD9wIyfmHMYIBmjCCAZYCAQEwgZQwgY4xCzAJBgNVBAYTAlVTMQswCQYDVQQIEwJDQTEWMBQGA1UEBxMNTW91bnRhaW4gVmlldzEUMBIGA1UEChMLUGF5UGFsIEluYy4xEzARBgNVBAsUCmxpdmVfY2VydHMxETAPBgNVBAMUCGxpdmVfYXBpMRwwGgYJKoZIhvcNAQkBFg1yZUBwYXlwYWwuY29tAgEAMAkGBSsOAwIaBQCgXTAYBgkqhkiG9w0BCQMxCwYJKoZIhvcNAQcBMBwGCSqGSIb3DQEJBTEPFw0xNjExMjkyMjA1NTNaMCMGCSqGSIb3DQEJBDEWBBS4i3Exr/pFcKOJy8uKmH+nGIMjqDANBgkqhkiG9w0BAQEFAASBgDAbFZ2jieloeB/0wCAcvYCFAIXmmBaMS5js/byzU5gK7exSTlRMX74IkmHemItaOcw3wyFlu4i118D9K9SbSbFiX9DGDcezGh42u/6G8TuZMwlvmiehwMioTVcm4jWG40YLiv8pJZypfoSx2w4IAFb4na5i/E1qOrwQOpiBho+s-----END PKCS7----- +'> +<button alt='PayPal - The safer, easier way to pay online!' class='btn waves-effect waves-light red lighten-3' name='action' type='submit'> +Donate Now +</button> +</form> +</div> +<div class='col l4 s12'> +<h5 class='white-text'>Join the Discussion</h5> +<p class='grey-text text-lighten-4'>We have a Gitter chat room set up where you can talk directly with us. Come in and discuss new features, future goals, general problems or questions, or anything else you can think of.</p> +<a class='btn waves-effect waves-light red lighten-3' href='https://gitter.im/Syncleus/aparapi' target='_blank'>Chat</a> +</div> +<div class='col l4 s12' style='overflow: hidden;'> +<h5 class='white-text'>Connect</h5> +<iframe allowtransparency='true' frameborder='0' height='30' scrolling='0' src='http://ghbtns.com/github-btn.html?user=Syncleus&repo=aparapi&type=watch&count=true&size=large' width='170'></iframe> +<br> +<a class='twitter-follow-button' data-dnt='true' data-show-count='true' data-size='large' href='https://twitter.com/AparapiLib'>Follow @AparapiLib</a> +<br> +<div class='g-follow' data-annotation='bubble' data-height='24' data-href='https://plus.google.com/102266131584900704956' data-rel='publisher'></div> +</div> +</div> +</div> +<div class='footer-copyright'> +<div class='container'> +© 2016-2017 Syncleus, All rights reserved. +<a class='grey-text text-lighten-4 right' href='https://github.com/Syncleus/aparapi/blob/master/LICENSE'>Apache License v2</a> +</div> +</div> +</footer> +<!-- Scripts --> +<script src='https://code.jquery.com/jquery-2.1.4.min.js'></script> +<script> + if (!window.jQuery) { document.write('<script src="bin/jquery-2.1.1.min.js"><\/script>'); } +</script> +<script src='/javascripts/jquery.timeago.js'></script> +<script src='/javascripts/materialize.min.js'></script> +<script src='/javascripts/lunr.min.js'></script> +<script src='/javascripts/search.js'></script> +<script src='/javascripts/materialize.js'></script> +<script src='/javascripts/init.js'></script> +<!-- Twitter Button --> +<script> + !function(d,s,id){var js,fjs=d.getElementsByTagName(s)[0],p=/^http:/.test(d.location)?'http':'https';if(!d.getElementById(id)){js=d.createElement(s);js.id=id;js.src=p+'://platform.twitter.com/widgets.js';fjs.parentNode.insertBefore(js,fjs);}}(document, 'script', 'twitter-wjs'); +</script> +<!-- Google Plus Button --> +<script async='' defer='defer' src='https://apis.google.com/js/platform.js'></script> +</body> </html> diff --git a/proposals/multiple-dim-nd-range.html b/proposals/multiple-dim-nd-range.html index c75308356cc7d0f5af1228427f67c4f608b6cbb6..54f87512b9462f6aeb95c77182bf12e0b4640023 100644 --- a/proposals/multiple-dim-nd-range.html +++ b/proposals/multiple-dim-nd-range.html @@ -1,378 +1,380 @@ <!DOCTYPE html> <html lang='en'> - <head> - <meta content='text/html; charset=UTF-8' http-equiv='Content-Type'> - <meta content='width=device-width, initial-scale=1' name='viewport'> - <meta content='IE=edge' http-equiv='X-UA-Compatible'> - <meta content='no' name='msapplication-tap-highlight'> - <meta content='Aparapi is an Open-source framework for executing native Java code on the GPU, developed by Syncleus.' name='description'> - <title> - Aparapi | Multiple Dim ND Range - </title> - <!-- Favicons --> - <link href='/images/favicon/apple-touch-icon-152x152.png' rel='apple-touch-icon-precomposed'> - <meta content='#FFFFFF' name='msapplication-TileColor'> - <meta content='/images/favicon/mstile-144x144.png' name='msapplication-TileImage'> - <link href='/images/favicon/favicon-32x32.png' rel='icon' sizes='32x32'> - <!-- Android 5 Chrome Color --> - <meta content='#EE6E73' name='theme-color'> - <!-- CSS --> - <link href='/stylesheets/highlight.css' media='screen,projection' rel='stylesheet' type='text/css'> - <link href='/stylesheets/style.css' media='screen,projection' rel='stylesheet' type='text/css'> - <link href='http://fonts.googleapis.com/css?family=Inconsolata' rel='stylesheet' type='text/css'> - <link href='http://fonts.googleapis.com/icon?family=Material+Icons' rel='stylesheet'> - </head> - <body> - <header> - <div class='container'> - <a class='button-collapse top-nav waves-effect waves-light circle hide-on-large-only' data-activates='nav-mobile' href='#'> - <i class='material-icons'>menu</i> - </a> - </div> - <ul class='side-nav fixed' id='nav-mobile'> - <li class='logo'> - <a class='brand-logo' href='/' id='logo-container'> - <object data='/images/logo.svg' id='front-page-logo' type='image/svg+xml'>Your browser does not support SVG</object> - </a> - </li> - <li class='search'> - <div class='search-wrapper card'> - <input id='search'> - <i class='material-icons'>search</i> - <div class='search-results'></div> - </div> - </li> - <li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/">Overview</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Introduction</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/about.html">About</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/getting-started.html">Getting Started</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/faq.html">FAQ</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Documentation</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/aparapi-patterns.html">Aparapi Patterns</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/choosing-specific-devices.html">Choosing Specific Devices</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/converting-java-to-opencl.html">Converting Java to OpenCL</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/emulating-multiple-entrypoints.html">Emulating Multiple Entrypoints</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/explicit-buffer-handling.html">Explicit Buffer Handling</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/hsa-enabled-lambda.html">HSA Enabled Lambda</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/kernel-guidelines.html">Kernel Guidelines</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/library-agent-duality.html">Library Agent Duality</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/new-features.html">New Features</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/opencl-bindings.html">OpenCL Bindings</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/private-memory-space.html">Private Memory Space</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/profiling-the-kernel.html">Profiling the Kernel</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/setting-up-hsa.html">Setting Up HSA</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/unit-tests.html">Unit Tests</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/using-hsa-simulator.html">Using HSA Simulator</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/constant-memory.html">Constant Memory</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/local-memory.html">Local Memory</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/multiple-dim-ranges.html">Multiple Dim Ranges</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Proposals</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/multiple-dim-nd-range.html">Multiple Dim ND Range</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/lambdas.html">Lambdas</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/address-space-with-buffers.html">Address Space with Buffers</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/extensions.html">Extensions</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/device.html">Device</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/multiple-entry-points.html">Multiple Entry Points</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/lambda-syntax.html">Lambda Syntax</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/showcase.html">Showcase</a></li></ul></li> - </ul> - </header> - <main> - <div class='section no-pad-bot' id='index-banner'> - <div class='container'> - <h1 class='header center-on-small-only'>Multiple Dim ND Range</h1> - <div class='row center'> - <h4 class='header col s12 light center'>A proposal for accessing multi-dim ND range execution.</h4> - </div> - - </div> - - </div> - <div class='container'> - <p>We can discuss this proposal either here (in comments) or via the discussion list here.</p> - - <p>Note this is nothing to do with accessing Java 2D arrays in Aparapi. This discussion is focused on the ability to expose the execution of kernels over 1, 2 or 3 dimensions. The memory in each case is a single contiguous region (like a single dimension primitive array).</p> - - <p>At present an Aparapi kernel can only be executed using a single dimension. If we wish to represent execution over WIDTH x HEIGHT element grid we would execute over the range (WIDTH*HEIGHT) and manually divide/mod getGlobalID() by WIDTH to determine the x and y for each.</p> - - <p>Similarly we would multiply y by WIDTH and add x (y*WIDTH+x) to convert an X,Y location to a linear global id</p> - <pre class="highlight java"><code> - <span class="kd">final</span> <span class="kd">static</span> <span class="kt">int</span> <span class="n">WIDTH</span><span class="o">=</span><span class="mi">128</span><span class="o">;</span> - <span class="kd">final</span> <span class="kd">static</span> <span class="kt">int</span> <span class="n">HEIGHT</span><span class="o">=</span><span class="mi">64</span><span class="o">;</span> - <span class="kd">final</span> <span class="kt">int</span> <span class="n">in</span><span class="o">[]</span> <span class="o">=</span> <span class="k">new</span> <span class="kt">int</span><span class="o">[</span><span class="n">WIDTH</span><span class="o">*</span><span class="n">HEIGHT</span><span class="o">];</span> - <span class="kd">final</span> <span class="kt">int</span> <span class="n">out</span><span class="o">[]</span> <span class="o">=</span> <span class="k">new</span> <span class="kt">int</span><span class="o">[</span><span class="n">WIDTH</span><span class="o">*</span><span class="n">HEIGHT</span><span class="o">];</span> - <span class="n">Kernel</span> <span class="n">kernel</span> <span class="o">=</span> <span class="k">new</span> <span class="n">Kernel</span><span class="o">(){</span> - <span class="kd">public</span> <span class="kt">void</span> <span class="nf">run</span><span class="o">(){</span> - <span class="kt">int</span> <span class="n">x</span> <span class="o">=</span> <span class="n">getGlobaId</span><span class="o">()%</span><span class="n">WIDTH</span><span class="o">;</span> - <span class="kt">int</span> <span class="n">y</span> <span class="o">=</span> <span class="n">getGlobalID</span><span class="o">()/</span><span class="n">WIDTH</span><span class="o">;</span> - <span class="k">if</span> <span class="o">(!(</span><span class="n">x</span><span class="o">==</span><span class="mi">1</span> <span class="o">||</span> <span class="n">x</span><span class="o">==(</span><span class="n">WIDTH</span><span class="o">-</span><span class="mi">1</span><span class="o">)</span> <span class="o">||</span> <span class="n">y</span><span class="o">==</span><span class="mi">1</span> <span class="o">||</span> <span class="n">y</span><span class="o">==(</span><span class="n">HEIGHT</span><span class="o">-</span><span class="mi">1</span><span class="o">)){</span> - <span class="kt">int</span> <span class="n">sum</span> <span class="o">=</span> <span class="mi">0</span><span class="o">;</span> - <span class="k">for</span> <span class="o">(</span><span class="kt">int</span> <span class="n">dx</span> <span class="o">=-</span><span class="mi">1</span><span class="o">;</span> <span class="n">dx</span><span class="o"><</span><span class="mi">2</span><span class="o">;</span> <span class="n">dx</span><span class="o">++){</span> - <span class="k">for</span> <span class="o">(</span><span class="kt">int</span> <span class="n">dy</span> <span class="o">=-</span><span class="mi">1</span><span class="o">;</span> <span class="n">dy</span><span class="o"><</span><span class="mi">2</span><span class="o">;</span> <span class="n">dy</span><span class="o">++){</span> - <span class="n">sum</span><span class="o">+=</span><span class="n">in</span><span class="o">[(</span><span class="n">y</span><span class="o">+</span><span class="n">dy</span><span class="o">)*</span><span class="n">WIDTH</span><span class="o">+(</span><span class="n">x</span><span class="o">+</span><span class="n">dx</span><span class="o">)];</span> - <span class="o">}</span> - <span class="o">}</span> - <span class="n">out</span><span class="o">[</span><span class="n">y</span><span class="o">*</span><span class="n">WIDTH</span><span class="o">+</span><span class="n">x</span><span class="o">]</span> <span class="o">=</span> <span class="n">sum</span><span class="o">/</span><span class="mi">9</span><span class="o">;</span> - <span class="c1">// or out[getGlobalID()] = sum/9;</span> - <span class="o">}</span> +<head> +<meta content='text/html; charset=UTF-8' http-equiv='Content-Type'> +<meta content='width=device-width, initial-scale=1' name='viewport'> +<meta content='IE=edge' http-equiv='X-UA-Compatible'> +<meta content='no' name='msapplication-tap-highlight'> +<meta content='Aparapi is an Open-source framework for executing native Java code on the GPU, developed by Syncleus.' name='description'> +<title> +Aparapi | Multiple Dim ND Range +</title> +<!-- Favicons --> +<link href='/images/favicon/apple-touch-icon-152x152.png' rel='apple-touch-icon-precomposed'> +<meta content='#FFFFFF' name='msapplication-TileColor'> +<meta content='/images/favicon/mstile-144x144.png' name='msapplication-TileImage'> +<link href='/images/favicon/favicon-32x32.png' rel='icon' sizes='32x32'> +<!-- Android 5 Chrome Color --> +<meta content='#EE6E73' name='theme-color'> +<!-- CSS --> +<link href='/stylesheets/highlight.css' media='screen,projection' rel='stylesheet' type='text/css'> +<link href='/stylesheets/style.css' media='screen,projection' rel='stylesheet' type='text/css'> +<link href='http://fonts.googleapis.com/css?family=Inconsolata' rel='stylesheet' type='text/css'> +<link href='http://fonts.googleapis.com/icon?family=Material+Icons' rel='stylesheet'> +</head> +<body> +<header> +<div class='container'> +<a class='button-collapse top-nav waves-effect waves-light circle hide-on-large-only' data-activates='nav-mobile' href='#'> +<i class='material-icons'>menu</i> +</a> +</div> +<ul class='side-nav fixed' id='nav-mobile'> +<li class='logo'> +<a class='brand-logo' href='/' id='logo-container'> +<object data='/images/logo.svg' id='front-page-logo' type='image/svg+xml'>Your browser does not support SVG</object> +</a> +</li> +<li class='search'> +<div class='search-wrapper card'> +<input id='search'> +<i class='material-icons'>search</i> +<div class='search-results'></div> +</div> +</li> +<li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/">Overview</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Introduction</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/about.html">About</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/getting-started.html">Getting Started</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/faq.html">FAQ</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Documentation</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/aparapi-patterns.html">Aparapi Patterns</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/choosing-specific-devices.html">Choosing Specific Devices</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/converting-java-to-opencl.html">Converting Java to OpenCL</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/emulating-multiple-entrypoints.html">Emulating Multiple Entrypoints</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/explicit-buffer-handling.html">Explicit Buffer Handling</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/hsa-enabled-lambda.html">HSA Enabled Lambda</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/kernel-guidelines.html">Kernel Guidelines</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/library-agent-duality.html">Library Agent Duality</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/new-features.html">New Features</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/opencl-bindings.html">OpenCL Bindings</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/private-memory-space.html">Private Memory Space</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/profiling-the-kernel.html">Profiling the Kernel</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/setting-up-hsa.html">Setting Up HSA</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/unit-tests.html">Unit Tests</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/using-hsa-simulator.html">Using HSA Simulator</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/constant-memory.html">Constant Memory</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/local-memory.html">Local Memory</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/multiple-dim-ranges.html">Multiple Dim Ranges</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Proposals</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/multiple-dim-nd-range.html">Multiple Dim ND Range</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/lambdas.html">Lambdas</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/address-space-with-buffers.html">Address Space with Buffers</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/extensions.html">Extensions</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/device.html">Device</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/multiple-entry-points.html">Multiple Entry Points</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/lambda-syntax.html">Lambda Syntax</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/showcase.html">Showcase</a></li></ul></li> +</ul> +</header> +<main> +<div class='section no-pad-bot' id='index-banner'> +<div class='container'> +<h1 class='header center-on-small-only'>Multiple Dim ND Range</h1> +<div class='row center'> +<h4 class='header col s12 light center'>A proposal for accessing multi-dim ND range execution.</h4> +</div> + +</div> + +</div> +<div class='container'> +<p>We can discuss this proposal either here (in comments) or via the discussion list here.</p> + +<p>Note this is nothing to do with accessing Java 2D arrays in Aparapi. This discussion is focused on the ability to expose the execution of kernels over 1, 2 or 3 dimensions. The memory in each case is a single contiguous region (like a single dimension primitive array).</p> + +<p>At present an Aparapi kernel can only be executed using a single dimension. If we wish to represent execution over WIDTH x HEIGHT element grid we would execute over the range (WIDTH*HEIGHT) and manually divide/mod getGlobalID() by WIDTH to determine the x and y for each.</p> + +<p>Similarly we would multiply y by WIDTH and add x (y*WIDTH+x) to convert an X,Y location to a linear global id</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="kd">final</span> <span class="kd">static</span> <span class="kt">int</span> <span class="n">WIDTH</span><span class="o">=</span><span class="mi">128</span><span class="o">;</span> +<span class="kd">final</span> <span class="kd">static</span> <span class="kt">int</span> <span class="n">HEIGHT</span><span class="o">=</span><span class="mi">64</span><span class="o">;</span> +<span class="kd">final</span> <span class="kt">int</span> <span class="n">in</span><span class="o">[]</span> <span class="o">=</span> <span class="k">new</span> <span class="kt">int</span><span class="o">[</span><span class="n">WIDTH</span><span class="o">*</span><span class="n">HEIGHT</span><span class="o">];</span> +<span class="kd">final</span> <span class="kt">int</span> <span class="n">out</span><span class="o">[]</span> <span class="o">=</span> <span class="k">new</span> <span class="kt">int</span><span class="o">[</span><span class="n">WIDTH</span><span class="o">*</span><span class="n">HEIGHT</span><span class="o">];</span> +<span class="n">Kernel</span> <span class="n">kernel</span> <span class="o">=</span> <span class="k">new</span> <span class="n">Kernel</span><span class="o">(){</span> + <span class="kd">public</span> <span class="kt">void</span> <span class="nf">run</span><span class="o">(){</span> + <span class="kt">int</span> <span class="n">x</span> <span class="o">=</span> <span class="n">getGlobaId</span><span class="o">()%</span><span class="n">WIDTH</span><span class="o">;</span> + <span class="kt">int</span> <span class="n">y</span> <span class="o">=</span> <span class="n">getGlobalID</span><span class="o">()/</span><span class="n">WIDTH</span><span class="o">;</span> + <span class="k">if</span> <span class="o">(!(</span><span class="n">x</span><span class="o">==</span><span class="mi">1</span> <span class="o">||</span> <span class="n">x</span><span class="o">==(</span><span class="n">WIDTH</span><span class="o">-</span><span class="mi">1</span><span class="o">)</span> <span class="o">||</span> <span class="n">y</span><span class="o">==</span><span class="mi">1</span> <span class="o">||</span> <span class="n">y</span><span class="o">==(</span><span class="n">HEIGHT</span><span class="o">-</span><span class="mi">1</span><span class="o">)){</span> + <span class="kt">int</span> <span class="n">sum</span> <span class="o">=</span> <span class="mi">0</span><span class="o">;</span> + <span class="k">for</span> <span class="o">(</span><span class="kt">int</span> <span class="n">dx</span> <span class="o">=-</span><span class="mi">1</span><span class="o">;</span> <span class="n">dx</span><span class="o"><</span><span class="mi">2</span><span class="o">;</span> <span class="n">dx</span><span class="o">++){</span> + <span class="k">for</span> <span class="o">(</span><span class="kt">int</span> <span class="n">dy</span> <span class="o">=-</span><span class="mi">1</span><span class="o">;</span> <span class="n">dy</span><span class="o"><</span><span class="mi">2</span><span class="o">;</span> <span class="n">dy</span><span class="o">++){</span> + <span class="n">sum</span><span class="o">+=</span><span class="n">in</span><span class="o">[(</span><span class="n">y</span><span class="o">+</span><span class="n">dy</span><span class="o">)*</span><span class="n">WIDTH</span><span class="o">+(</span><span class="n">x</span><span class="o">+</span><span class="n">dx</span><span class="o">)];</span> <span class="o">}</span> - - <span class="o">};</span> - <span class="n">kernel</span><span class="o">.</span><span class="na">execute</span><span class="o">(</span><span class="n">WIDTH</span><span class="o">*</span><span class="n">HEIGHT</span><span class="o">);</span> - </code></pre> - <p>OpenCL natively allows the user to execute over 1, 2 or 3 dimension grids via the clEnqueueNDRangeKernel() method.</p> - - <p>We chose not to expose this in Aparapi but there have been requests for us to allow it.</p> - - <p>There are a number of things to consider here:</p> - - <ol> - <li>Extending the syntax of kernel.execute() to allow multi dimensional grids.</li> - <li>Mapping Kernel methods to OpenCL’s get<em>local</em>id(int dim), get<em>local</em>size(int dim), get<em>group</em>id(int<em>dim), etc. At present we map kernel.getGlobalId() to get</em>local_id(0).</li> - <li>Handling all of these when an application drops back to JTP mode.</li> - </ol> - - <h2>Extending Kernel.execute(int range)</h2> - - <p>Sadly we can’t overload Kernel.execute(int range), Kernel.execute(int xrange, int yrange) and Kernel.execute(int xrange, int yrange, int zrange) because we already have kernel.execute(int, int) mapped for executing mutiple passes over the linear range.</p> - - <p>Remember</p> - <pre class="highlight java"><code> - <span class="k">for</span> <span class="o">(</span><span class="kt">int</span> <span class="n">pass</span><span class="o">=</span><span class="mi">0</span><span class="o">;</span> <span class="n">pass</span><span class="o"><</span><span class="mi">20</span><span class="o">;</span> <span class="n">pass</span><span class="o">++){</span> - <span class="n">kernel</span><span class="o">(</span><span class="mi">1024</span><span class="o">);</span> - <span class="o">}</span> - </code></pre> - <p>Is equivalent to</p> - <pre class="highlight java"><code> - <span class="n">kernel</span><span class="o">(</span><span class="mi">1024</span><span class="o">,</span> <span class="mi">20</span><span class="o">);</span> - </code></pre> - <p>I think I would prefer</p> - <pre class="highlight java"><code> - <span class="n">Kernel</span><span class="o">.</span><span class="na">execute</span><span class="o">(</span><span class="kt">int</span> <span class="n">range</span><span class="o">)</span> - <span class="n">Kernel</span><span class="o">.</span><span class="na">execute</span><span class="o">(</span><span class="kt">int</span> <span class="n">range</span><span class="o">,</span> <span class="kt">int</span> <span class="n">passes</span><span class="o">)</span> - <span class="n">Kernel</span><span class="o">.</span><span class="na">executeXY</span><span class="o">(</span><span class="kt">int</span> <span class="n">xrange</span><span class="o">,</span> <span class="kt">int</span> <span class="n">yrange</span><span class="o">)</span> - <span class="n">Kernel</span><span class="o">.</span><span class="na">executeXY</span><span class="o">(</span><span class="kt">int</span> <span class="n">xrange</span><span class="o">,</span> <span class="kt">int</span> <span class="n">yrange</span><span class="o">,</span> <span class="kt">int</span> <span class="n">passes</span><span class="o">)</span> - <span class="n">Kernel</span><span class="o">.</span><span class="na">executeXYZ</span><span class="o">(</span><span class="kt">int</span> <span class="n">xrange</span><span class="o">,</span> <span class="kt">int</span> <span class="n">yrange</span><span class="o">,</span> <span class="kt">int</span> <span class="n">zrange</span><span class="o">)</span> - <span class="n">Kernel</span><span class="o">.</span><span class="na">executeXYZ</span><span class="o">(</span><span class="kt">int</span> <span class="n">xrange</span><span class="o">,</span> <span class="kt">int</span> <span class="n">yrange</span><span class="o">,</span> <span class="kt">int</span> <span class="n">zrange</span><span class="o">,</span> <span class="kt">int</span> <span class="n">passes</span><span class="o">)</span> - </code></pre> - <p>Obviously in the above calls we are only supplying the global bounds for the grid. We could also provide mappings allowing local ranges. I think I would prefer</p> - <pre class="highlight java"><code> - <span class="n">Kernel</span><span class="o">.</span><span class="na">executeLocal</span><span class="o">(</span><span class="kt">int</span> <span class="n">range</span><span class="o">,</span> <span class="kt">int</span> <span class="n">local</span><span class="o">)</span> - <span class="n">Kernel</span><span class="o">.</span><span class="na">executeLocal</span><span class="o">(</span><span class="kt">int</span> <span class="n">range</span><span class="o">,</span> <span class="kt">int</span> <span class="n">local</span><span class="o">,</span> <span class="kt">int</span> <span class="n">passes</span><span class="o">)</span> - <span class="n">Kernel</span><span class="o">.</span><span class="na">executeXYLocal</span><span class="o">(</span><span class="kt">int</span> <span class="n">xrange</span><span class="o">,</span> <span class="kt">int</span> <span class="n">yrange</span><span class="o">,</span> <span class="kt">int</span> <span class="n">xlocalrange</span><span class="o">,</span> <span class="kt">int</span> <span class="n">ylocalrange</span><span class="o">)</span> - <span class="n">Kernel</span><span class="o">.</span><span class="na">executeXYLocal</span><span class="o">(</span><span class="kt">int</span> <span class="n">xrange</span><span class="o">,</span> <span class="kt">int</span> <span class="n">yrange</span><span class="o">,</span> <span class="kt">int</span> <span class="n">xlocalrange</span><span class="o">,</span> <span class="kt">int</span> <span class="n">ylocalrange</span><span class="o">,</span> <span class="kt">int</span> <span class="n">passes</span><span class="o">)</span> - <span class="n">Kernel</span><span class="o">.</span><span class="na">executeXYZLocal</span><span class="o">(</span><span class="kt">int</span> <span class="n">xrange</span><span class="o">,</span> <span class="kt">int</span> <span class="n">yrange</span><span class="o">,</span> <span class="kt">int</span> <span class="n">zrange</span><span class="o">,</span> <span class="kt">int</span> <span class="n">xlocalrange</span><span class="o">,</span> <span class="kt">int</span> <span class="n">ylocalrange</span><span class="o">,</span> <span class="kt">int</span> <span class="n">zlocalrange</span><span class="o">)</span> - <span class="n">Kernel</span><span class="o">.</span><span class="na">executeXYZLocal</span><span class="o">(</span><span class="kt">int</span> <span class="n">xrange</span><span class="o">,</span> <span class="kt">int</span> <span class="n">yrange</span><span class="o">,</span> <span class="kt">int</span> <span class="n">zrange</span><span class="o">,</span> <span class="kt">int</span> <span class="n">xlocalrange</span><span class="o">,</span> <span class="kt">int</span> <span class="n">ylocalrange</span><span class="o">,</span> <span class="kt">int</span> <span class="n">zlocalrange</span><span class="o">,</span> <span class="kt">int</span> <span class="n">passes</span><span class="o">)</span> - </code></pre> - <p>Another alternative may be to create Range classes</p> - <pre class="highlight java"><code> - <span class="kd">class</span> <span class="nc">Range</span><span class="o">{</span> - <span class="kt">int</span> <span class="n">passes</span><span class="o">;</span> - <span class="kt">int</span> <span class="n">width</span><span class="o">;</span> - <span class="kd">static</span> <span class="n">Range</span> <span class="nf">create</span><span class="o">(</span><span class="kt">int</span> <span class="n">width</span><span class="o">);</span> - <span class="kd">static</span> <span class="n">Range</span> <span class="nf">create</span><span class="o">(</span><span class="kt">int</span> <span class="n">width</span><span class="o">,</span> <span class="kt">int</span> <span class="n">passes</span><span class="o">);</span> - <span class="o">}</span> - - <span class="kd">class</span> <span class="nc">Range2D</span> <span class="kd">extends</span> <span class="n">Range</span><span class="o">{</span> - <span class="kt">int</span> <span class="n">height</span><span class="o">;</span> - <span class="kd">static</span> <span class="n">Range</span> <span class="nf">create</span><span class="o">(</span><span class="kt">int</span> <span class="n">width</span><span class="o">,</span> <span class="kt">int</span> <span class="n">height</span><span class="o">);</span> - <span class="kd">static</span> <span class="n">Range</span> <span class="nf">create</span><span class="o">(</span><span class="kt">int</span> <span class="n">width</span><span class="o">,</span> <span class="kt">int</span> <span class="n">height</span><span class="o">,</span> <span class="kt">int</span> <span class="n">passes</span><span class="o">);</span> - - <span class="o">}</span> - - <span class="kd">class</span> <span class="nc">Range3D</span> <span class="kd">extends</span> <span class="n">Range2D</span><span class="o">{</span> - <span class="kt">int</span> <span class="n">depth</span><span class="o">;</span> - <span class="kd">static</span> <span class="n">Range</span> <span class="nf">create</span><span class="o">(</span><span class="kt">int</span> <span class="n">width</span><span class="o">,</span> <span class="kt">int</span> <span class="n">height</span><span class="o">);</span> - <span class="kd">static</span> <span class="n">Range</span> <span class="nf">create</span><span class="o">(</span><span class="kt">int</span> <span class="n">width</span><span class="o">,</span> <span class="kt">int</span> <span class="n">height</span><span class="o">,</span> <span class="kt">int</span> <span class="n">passes</span><span class="o">);</span> - <span class="o">}</span> - </code></pre> - <p>With appropriate constructors (or factory methods) to allow</p> - <pre class="highlight java"><code> - <span class="n">Kernel</span><span class="o">.</span><span class="na">execute</span><span class="o">(</span><span class="n">Range</span> <span class="n">range</span><span class="o">)</span> - </code></pre> - <p>Then execute would be simply.</p> - <pre class="highlight java"><code> - <span class="n">Kernel</span><span class="o">.</span><span class="na">execute</span><span class="o">(</span><span class="n">Range</span><span class="o">.</span><span class="na">create</span><span class="o">(</span><span class="mi">1</span><span class="o">,</span><span class="mi">1</span><span class="o">))</span> - </code></pre> - <p>We can also arrange for the group size to be placed in the base Range class.</p> - <pre class="highlight java"><code> - <span class="kd">class</span> <span class="nc">Range</span><span class="o">{</span> - <span class="kt">int</span> <span class="n">groupSize</span><span class="o">;</span> - <span class="kt">int</span> <span class="n">passes</span><span class="o">;</span> - <span class="kt">int</span> <span class="n">width</span><span class="o">;</span> - <span class="kd">static</span> <span class="n">Range</span> <span class="nf">create</span><span class="o">(</span><span class="kt">int</span> <span class="n">width</span><span class="o">);</span> - <span class="kd">static</span> <span class="n">Range</span> <span class="nf">create</span><span class="o">(</span><span class="kt">int</span> <span class="n">width</span><span class="o">,</span> <span class="kt">int</span> <span class="n">passes</span><span class="o">);</span> - <span class="o">}</span> - </code></pre> - <h2>Mapping to OpenCL multi dim methods. i.e get<em>global</em>id(1), get<em>local</em>size(2) etc</h2> - - <p>We could just add getGlobalId(int dim), getLocalSize(int dim) etc to replicate OpenCL methods.</p> - - <p>I would prefer to offer the following global mappings</p> - - <table><thead> - <tr> - <th>Kernel</th> - <th>OpenCL</th> - </tr> - </thead><tbody> - <tr> - <td>getGlobalId()</td> - <td>get<em>global</em>id(0)</td> - </tr> - <tr> - <td>getGlobalX()</td> - <td>get<em>global</em>id(0)</td> - </tr> - <tr> - <td>getGlobalY()</td> - <td>get<em>global</em>id(1)</td> - </tr> - <tr> - <td>getGlobalZ()</td> - <td>get<em>global</em>id(2)</td> - </tr> - <tr> - <td>getGlobalSize()</td> - <td>get<em>global</em>size(0)</td> - </tr> - <tr> - <td>getGlobalWidth()</td> - <td>get<em>global</em>size(0)</td> - </tr> - <tr> - <td>getGlobalHeight()</td> - <td>get<em>global</em>size(1)</td> - </tr> - <tr> - <td>getGlobalDepth()</td> - <td>get<em>global</em>size(2)</td> - </tr> - </tbody></table> - - <p>And the following local mappings</p> - - <table><thead> - <tr> - <th>Kernel</th> - <th>OpenCL</th> - </tr> - </thead><tbody> - <tr> - <td>getLocalId()</td> - <td>get<em>local</em>id(0)</td> - </tr> - <tr> - <td>getLocalX()</td> - <td>get<em>local</em>id(0)</td> - </tr> - <tr> - <td>getLocalY()</td> - <td>get<em>local</em>id(1)</td> - </tr> - <tr> - <td>getLocalZ()</td> - <td>get<em>local</em>id(2)</td> - </tr> - <tr> - <td>getLocalSize()</td> - <td>get<em>local</em>size(0)</td> - </tr> - <tr> - <td>getLocalWidth()</td> - <td>get<em>local</em>size(0)</td> - </tr> - <tr> - <td>getLocalHeight()</td> - <td>get<em>local</em>size(1)</td> - </tr> - <tr> - <td>getLocalDepth()</td> - <td>get<em>local</em>size(2)</td> - </tr> - </tbody></table> - - <h2>An example</h2> - <pre class="highlight java"><code> - <span class="kd">final</span> <span class="kd">static</span> <span class="kt">int</span> <span class="n">WIDTH</span><span class="o">=</span><span class="mi">128</span><span class="o">;</span> - <span class="kd">final</span> <span class="kd">static</span> <span class="kt">int</span> <span class="n">HEIGHT</span><span class="o">=</span><span class="mi">64</span><span class="o">;</span> - <span class="kd">final</span> <span class="kt">int</span> <span class="n">in</span><span class="o">[]</span> <span class="o">=</span> <span class="k">new</span> <span class="kt">int</span><span class="o">[</span><span class="n">WIDTH</span><span class="o">*</span><span class="n">HEIGHT</span><span class="o">];</span> - <span class="kd">final</span> <span class="kt">int</span> <span class="n">out</span><span class="o">[]</span> <span class="o">=</span> <span class="k">new</span> <span class="kt">int</span><span class="o">[</span><span class="n">WIDTH</span><span class="o">*</span><span class="n">HEIGHT</span><span class="o">];</span> - <span class="n">Kernel</span> <span class="n">kernel</span> <span class="o">=</span> <span class="k">new</span> <span class="n">Kernel</span><span class="o">(){</span> - <span class="kd">public</span> <span class="kt">void</span> <span class="nf">run</span><span class="o">(){</span> - <span class="kt">int</span> <span class="n">x</span> <span class="o">=</span> <span class="n">getGlobalX</span><span class="o">();</span> - <span class="kt">int</span> <span class="n">y</span> <span class="o">=</span> <span class="n">getGlobalY</span><span class="o">();</span> - <span class="k">if</span> <span class="o">(!(</span><span class="n">x</span><span class="o">==</span><span class="mi">1</span> <span class="o">||</span> <span class="n">x</span><span class="o">==(</span><span class="n">getGlobalWidth</span><span class="o">()-</span><span class="mi">1</span><span class="o">)</span> <span class="o">||</span> <span class="n">y</span><span class="o">==</span><span class="mi">1</span> <span class="o">||</span> <span class="n">y</span><span class="o">==(</span><span class="n">getGlobalHeight</span><span class="o">()-</span><span class="mi">1</span><span class="o">)){</span> - <span class="kt">int</span> <span class="n">sum</span> <span class="o">=</span> <span class="mi">0</span><span class="o">;</span> - <span class="k">for</span> <span class="o">(</span><span class="kt">int</span> <span class="n">dx</span> <span class="o">=-</span><span class="mi">1</span><span class="o">;</span> <span class="n">dx</span><span class="o"><</span><span class="mi">2</span><span class="o">;</span> <span class="n">dx</span><span class="o">++){</span> - <span class="k">for</span> <span class="o">(</span><span class="kt">int</span> <span class="n">dy</span> <span class="o">=-</span><span class="mi">1</span><span class="o">;</span> <span class="n">dy</span><span class="o"><</span><span class="mi">2</span><span class="o">;</span> <span class="n">dy</span><span class="o">++){</span> - <span class="n">sum</span><span class="o">+=</span><span class="n">in</span><span class="o">[(</span><span class="n">y</span><span class="o">+</span><span class="n">dy</span><span class="o">)*</span><span class="n">getGlobalWidth</span><span class="o">()+(</span><span class="n">x</span><span class="o">+</span><span class="n">dx</span><span class="o">)];</span> - <span class="o">}</span> - <span class="o">}</span> - <span class="n">out</span><span class="o">[</span><span class="n">y</span><span class="o">*</span><span class="n">getGlobalWidth</span><span class="o">()+</span><span class="n">x</span><span class="o">]</span> <span class="o">=</span> <span class="n">sum</span><span class="o">/</span><span class="mi">9</span><span class="o">;</span> - <span class="c1">// or out[getGlobalID()] = sum/9;</span> - <span class="o">}</span> + <span class="o">}</span> + <span class="n">out</span><span class="o">[</span><span class="n">y</span><span class="o">*</span><span class="n">WIDTH</span><span class="o">+</span><span class="n">x</span><span class="o">]</span> <span class="o">=</span> <span class="n">sum</span><span class="o">/</span><span class="mi">9</span><span class="o">;</span> + <span class="c1">// or out[getGlobalID()] = sum/9;</span> + <span class="o">}</span> + <span class="o">}</span> + +<span class="o">};</span> +<span class="n">kernel</span><span class="o">.</span><span class="na">execute</span><span class="o">(</span><span class="n">WIDTH</span><span class="o">*</span><span class="n">HEIGHT</span><span class="o">);</span> +</code></pre></div> +<p>OpenCL natively allows the user to execute over 1, 2 or 3 dimension grids via the clEnqueueNDRangeKernel() method.</p> + +<p>We chose not to expose this in Aparapi but there have been requests for us to allow it.</p> + +<p>There are a number of things to consider here:</p> + +<ol> +<li>Extending the syntax of kernel.execute() to allow multi dimensional grids.</li> +<li>Mapping Kernel methods to OpenCL’s get<em>local</em>id(int dim), get<em>local</em>size(int dim), get<em>group</em>id(int<em>dim), etc. At present we map kernel.getGlobalId() to get</em>local_id(0).</li> +<li>Handling all of these when an application drops back to JTP mode.</li> +</ol> + +<h2>Extending Kernel.execute(int range)</h2> + +<p>Sadly we can’t overload Kernel.execute(int range), Kernel.execute(int xrange, int yrange) and Kernel.execute(int xrange, int yrange, int zrange) because we already have kernel.execute(int, int) mapped for executing mutiple passes over the linear range.</p> + +<p>Remember</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="k">for</span> <span class="o">(</span><span class="kt">int</span> <span class="n">pass</span><span class="o">=</span><span class="mi">0</span><span class="o">;</span> <span class="n">pass</span><span class="o"><</span><span class="mi">20</span><span class="o">;</span> <span class="n">pass</span><span class="o">++){</span> + <span class="n">kernel</span><span class="o">(</span><span class="mi">1024</span><span class="o">);</span> +<span class="o">}</span> +</code></pre></div> +<p>Is equivalent to</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="n">kernel</span><span class="o">(</span><span class="mi">1024</span><span class="o">,</span> <span class="mi">20</span><span class="o">);</span> +</code></pre></div> +<p>I think I would prefer</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="n">Kernel</span><span class="o">.</span><span class="na">execute</span><span class="o">(</span><span class="kt">int</span> <span class="n">range</span><span class="o">)</span> +<span class="n">Kernel</span><span class="o">.</span><span class="na">execute</span><span class="o">(</span><span class="kt">int</span> <span class="n">range</span><span class="o">,</span> <span class="kt">int</span> <span class="n">passes</span><span class="o">)</span> +<span class="n">Kernel</span><span class="o">.</span><span class="na">executeXY</span><span class="o">(</span><span class="kt">int</span> <span class="n">xrange</span><span class="o">,</span> <span class="kt">int</span> <span class="n">yrange</span><span class="o">)</span> +<span class="n">Kernel</span><span class="o">.</span><span class="na">executeXY</span><span class="o">(</span><span class="kt">int</span> <span class="n">xrange</span><span class="o">,</span> <span class="kt">int</span> <span class="n">yrange</span><span class="o">,</span> <span class="kt">int</span> <span class="n">passes</span><span class="o">)</span> +<span class="n">Kernel</span><span class="o">.</span><span class="na">executeXYZ</span><span class="o">(</span><span class="kt">int</span> <span class="n">xrange</span><span class="o">,</span> <span class="kt">int</span> <span class="n">yrange</span><span class="o">,</span> <span class="kt">int</span> <span class="n">zrange</span><span class="o">)</span> +<span class="n">Kernel</span><span class="o">.</span><span class="na">executeXYZ</span><span class="o">(</span><span class="kt">int</span> <span class="n">xrange</span><span class="o">,</span> <span class="kt">int</span> <span class="n">yrange</span><span class="o">,</span> <span class="kt">int</span> <span class="n">zrange</span><span class="o">,</span> <span class="kt">int</span> <span class="n">passes</span><span class="o">)</span> +</code></pre></div> +<p>Obviously in the above calls we are only supplying the global bounds for the grid. We could also provide mappings allowing local ranges. I think I would prefer</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="n">Kernel</span><span class="o">.</span><span class="na">executeLocal</span><span class="o">(</span><span class="kt">int</span> <span class="n">range</span><span class="o">,</span> <span class="kt">int</span> <span class="n">local</span><span class="o">)</span> +<span class="n">Kernel</span><span class="o">.</span><span class="na">executeLocal</span><span class="o">(</span><span class="kt">int</span> <span class="n">range</span><span class="o">,</span> <span class="kt">int</span> <span class="n">local</span><span class="o">,</span> <span class="kt">int</span> <span class="n">passes</span><span class="o">)</span> +<span class="n">Kernel</span><span class="o">.</span><span class="na">executeXYLocal</span><span class="o">(</span><span class="kt">int</span> <span class="n">xrange</span><span class="o">,</span> <span class="kt">int</span> <span class="n">yrange</span><span class="o">,</span> <span class="kt">int</span> <span class="n">xlocalrange</span><span class="o">,</span> <span class="kt">int</span> <span class="n">ylocalrange</span><span class="o">)</span> +<span class="n">Kernel</span><span class="o">.</span><span class="na">executeXYLocal</span><span class="o">(</span><span class="kt">int</span> <span class="n">xrange</span><span class="o">,</span> <span class="kt">int</span> <span class="n">yrange</span><span class="o">,</span> <span class="kt">int</span> <span class="n">xlocalrange</span><span class="o">,</span> <span class="kt">int</span> <span class="n">ylocalrange</span><span class="o">,</span> <span class="kt">int</span> <span class="n">passes</span><span class="o">)</span> +<span class="n">Kernel</span><span class="o">.</span><span class="na">executeXYZLocal</span><span class="o">(</span><span class="kt">int</span> <span class="n">xrange</span><span class="o">,</span> <span class="kt">int</span> <span class="n">yrange</span><span class="o">,</span> <span class="kt">int</span> <span class="n">zrange</span><span class="o">,</span> <span class="kt">int</span> <span class="n">xlocalrange</span><span class="o">,</span> <span class="kt">int</span> <span class="n">ylocalrange</span><span class="o">,</span> <span class="kt">int</span> <span class="n">zlocalrange</span><span class="o">)</span> +<span class="n">Kernel</span><span class="o">.</span><span class="na">executeXYZLocal</span><span class="o">(</span><span class="kt">int</span> <span class="n">xrange</span><span class="o">,</span> <span class="kt">int</span> <span class="n">yrange</span><span class="o">,</span> <span class="kt">int</span> <span class="n">zrange</span><span class="o">,</span> <span class="kt">int</span> <span class="n">xlocalrange</span><span class="o">,</span> <span class="kt">int</span> <span class="n">ylocalrange</span><span class="o">,</span> <span class="kt">int</span> <span class="n">zlocalrange</span><span class="o">,</span> <span class="kt">int</span> <span class="n">passes</span><span class="o">)</span> +</code></pre></div> +<p>Another alternative may be to create Range classes</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="kd">class</span> <span class="nc">Range</span><span class="o">{</span> + <span class="kt">int</span> <span class="n">passes</span><span class="o">;</span> + <span class="kt">int</span> <span class="n">width</span><span class="o">;</span> + <span class="kd">static</span> <span class="n">Range</span> <span class="nf">create</span><span class="o">(</span><span class="kt">int</span> <span class="n">width</span><span class="o">);</span> + <span class="kd">static</span> <span class="n">Range</span> <span class="nf">create</span><span class="o">(</span><span class="kt">int</span> <span class="n">width</span><span class="o">,</span> <span class="kt">int</span> <span class="n">passes</span><span class="o">);</span> +<span class="o">}</span> + +<span class="kd">class</span> <span class="nc">Range2D</span> <span class="kd">extends</span> <span class="n">Range</span><span class="o">{</span> + <span class="kt">int</span> <span class="n">height</span><span class="o">;</span> + <span class="kd">static</span> <span class="n">Range</span> <span class="nf">create</span><span class="o">(</span><span class="kt">int</span> <span class="n">width</span><span class="o">,</span> <span class="kt">int</span> <span class="n">height</span><span class="o">);</span> + <span class="kd">static</span> <span class="n">Range</span> <span class="nf">create</span><span class="o">(</span><span class="kt">int</span> <span class="n">width</span><span class="o">,</span> <span class="kt">int</span> <span class="n">height</span><span class="o">,</span> <span class="kt">int</span> <span class="n">passes</span><span class="o">);</span> + +<span class="o">}</span> + +<span class="kd">class</span> <span class="nc">Range3D</span> <span class="kd">extends</span> <span class="n">Range2D</span><span class="o">{</span> + <span class="kt">int</span> <span class="n">depth</span><span class="o">;</span> + <span class="kd">static</span> <span class="n">Range</span> <span class="nf">create</span><span class="o">(</span><span class="kt">int</span> <span class="n">width</span><span class="o">,</span> <span class="kt">int</span> <span class="n">height</span><span class="o">);</span> + <span class="kd">static</span> <span class="n">Range</span> <span class="nf">create</span><span class="o">(</span><span class="kt">int</span> <span class="n">width</span><span class="o">,</span> <span class="kt">int</span> <span class="n">height</span><span class="o">,</span> <span class="kt">int</span> <span class="n">passes</span><span class="o">);</span> +<span class="o">}</span> +</code></pre></div> +<p>With appropriate constructors (or factory methods) to allow</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="n">Kernel</span><span class="o">.</span><span class="na">execute</span><span class="o">(</span><span class="n">Range</span> <span class="n">range</span><span class="o">)</span> +</code></pre></div> +<p>Then execute would be simply.</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="n">Kernel</span><span class="o">.</span><span class="na">execute</span><span class="o">(</span><span class="n">Range</span><span class="o">.</span><span class="na">create</span><span class="o">(</span><span class="mi">1</span><span class="o">,</span><span class="mi">1</span><span class="o">))</span> +</code></pre></div> +<p>We can also arrange for the group size to be placed in the base Range class.</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="kd">class</span> <span class="nc">Range</span><span class="o">{</span> + <span class="kt">int</span> <span class="n">groupSize</span><span class="o">;</span> + <span class="kt">int</span> <span class="n">passes</span><span class="o">;</span> + <span class="kt">int</span> <span class="n">width</span><span class="o">;</span> + <span class="kd">static</span> <span class="n">Range</span> <span class="nf">create</span><span class="o">(</span><span class="kt">int</span> <span class="n">width</span><span class="o">);</span> + <span class="kd">static</span> <span class="n">Range</span> <span class="nf">create</span><span class="o">(</span><span class="kt">int</span> <span class="n">width</span><span class="o">,</span> <span class="kt">int</span> <span class="n">passes</span><span class="o">);</span> +<span class="o">}</span> +</code></pre></div> +<h2>Mapping to OpenCL multi dim methods. i.e get<em>global</em>id(1), get<em>local</em>size(2) etc</h2> + +<p>We could just add getGlobalId(int dim), getLocalSize(int dim) etc to replicate OpenCL methods.</p> + +<p>I would prefer to offer the following global mappings</p> + +<table><thead> +<tr> +<th>Kernel</th> +<th>OpenCL</th> +</tr> +</thead><tbody> +<tr> +<td>getGlobalId()</td> +<td>get<em>global</em>id(0)</td> +</tr> +<tr> +<td>getGlobalX()</td> +<td>get<em>global</em>id(0)</td> +</tr> +<tr> +<td>getGlobalY()</td> +<td>get<em>global</em>id(1)</td> +</tr> +<tr> +<td>getGlobalZ()</td> +<td>get<em>global</em>id(2)</td> +</tr> +<tr> +<td>getGlobalSize()</td> +<td>get<em>global</em>size(0)</td> +</tr> +<tr> +<td>getGlobalWidth()</td> +<td>get<em>global</em>size(0)</td> +</tr> +<tr> +<td>getGlobalHeight()</td> +<td>get<em>global</em>size(1)</td> +</tr> +<tr> +<td>getGlobalDepth()</td> +<td>get<em>global</em>size(2)</td> +</tr> +</tbody></table> + +<p>And the following local mappings</p> + +<table><thead> +<tr> +<th>Kernel</th> +<th>OpenCL</th> +</tr> +</thead><tbody> +<tr> +<td>getLocalId()</td> +<td>get<em>local</em>id(0)</td> +</tr> +<tr> +<td>getLocalX()</td> +<td>get<em>local</em>id(0)</td> +</tr> +<tr> +<td>getLocalY()</td> +<td>get<em>local</em>id(1)</td> +</tr> +<tr> +<td>getLocalZ()</td> +<td>get<em>local</em>id(2)</td> +</tr> +<tr> +<td>getLocalSize()</td> +<td>get<em>local</em>size(0)</td> +</tr> +<tr> +<td>getLocalWidth()</td> +<td>get<em>local</em>size(0)</td> +</tr> +<tr> +<td>getLocalHeight()</td> +<td>get<em>local</em>size(1)</td> +</tr> +<tr> +<td>getLocalDepth()</td> +<td>get<em>local</em>size(2)</td> +</tr> +</tbody></table> + +<h2>An example</h2> +<div class="highlight"><pre class="highlight java"><code> +<span class="kd">final</span> <span class="kd">static</span> <span class="kt">int</span> <span class="n">WIDTH</span><span class="o">=</span><span class="mi">128</span><span class="o">;</span> +<span class="kd">final</span> <span class="kd">static</span> <span class="kt">int</span> <span class="n">HEIGHT</span><span class="o">=</span><span class="mi">64</span><span class="o">;</span> +<span class="kd">final</span> <span class="kt">int</span> <span class="n">in</span><span class="o">[]</span> <span class="o">=</span> <span class="k">new</span> <span class="kt">int</span><span class="o">[</span><span class="n">WIDTH</span><span class="o">*</span><span class="n">HEIGHT</span><span class="o">];</span> +<span class="kd">final</span> <span class="kt">int</span> <span class="n">out</span><span class="o">[]</span> <span class="o">=</span> <span class="k">new</span> <span class="kt">int</span><span class="o">[</span><span class="n">WIDTH</span><span class="o">*</span><span class="n">HEIGHT</span><span class="o">];</span> +<span class="n">Kernel</span> <span class="n">kernel</span> <span class="o">=</span> <span class="k">new</span> <span class="n">Kernel</span><span class="o">(){</span> + <span class="kd">public</span> <span class="kt">void</span> <span class="nf">run</span><span class="o">(){</span> + <span class="kt">int</span> <span class="n">x</span> <span class="o">=</span> <span class="n">getGlobalX</span><span class="o">();</span> + <span class="kt">int</span> <span class="n">y</span> <span class="o">=</span> <span class="n">getGlobalY</span><span class="o">();</span> + <span class="k">if</span> <span class="o">(!(</span><span class="n">x</span><span class="o">==</span><span class="mi">1</span> <span class="o">||</span> <span class="n">x</span><span class="o">==(</span><span class="n">getGlobalWidth</span><span class="o">()-</span><span class="mi">1</span><span class="o">)</span> <span class="o">||</span> <span class="n">y</span><span class="o">==</span><span class="mi">1</span> <span class="o">||</span> <span class="n">y</span><span class="o">==(</span><span class="n">getGlobalHeight</span><span class="o">()-</span><span class="mi">1</span><span class="o">)){</span> + <span class="kt">int</span> <span class="n">sum</span> <span class="o">=</span> <span class="mi">0</span><span class="o">;</span> + <span class="k">for</span> <span class="o">(</span><span class="kt">int</span> <span class="n">dx</span> <span class="o">=-</span><span class="mi">1</span><span class="o">;</span> <span class="n">dx</span><span class="o"><</span><span class="mi">2</span><span class="o">;</span> <span class="n">dx</span><span class="o">++){</span> + <span class="k">for</span> <span class="o">(</span><span class="kt">int</span> <span class="n">dy</span> <span class="o">=-</span><span class="mi">1</span><span class="o">;</span> <span class="n">dy</span><span class="o"><</span><span class="mi">2</span><span class="o">;</span> <span class="n">dy</span><span class="o">++){</span> + <span class="n">sum</span><span class="o">+=</span><span class="n">in</span><span class="o">[(</span><span class="n">y</span><span class="o">+</span><span class="n">dy</span><span class="o">)*</span><span class="n">getGlobalWidth</span><span class="o">()+(</span><span class="n">x</span><span class="o">+</span><span class="n">dx</span><span class="o">)];</span> <span class="o">}</span> - - <span class="o">};</span> - <span class="n">kernel</span><span class="o">.</span><span class="na">executeXY</span><span class="o">(</span><span class="n">WIDTH</span><span class="o">,</span> <span class="n">HEIGHT</span><span class="o">);</span> - </code></pre> - <p>Or if we choose the Range class approach.</p> - <pre class="highlight java"><code> - <span class="kd">final</span> <span class="kd">static</span> <span class="kt">int</span> <span class="n">WIDTH</span><span class="o">=</span><span class="mi">128</span><span class="o">;</span> - <span class="kd">final</span> <span class="kd">static</span> <span class="kt">int</span> <span class="n">HEIGHT</span><span class="o">=</span><span class="mi">64</span><span class="o">;</span> - <span class="kd">final</span> <span class="kt">int</span> <span class="n">in</span><span class="o">[]</span> <span class="o">=</span> <span class="k">new</span> <span class="kt">int</span><span class="o">[</span><span class="n">WIDTH</span><span class="o">*</span><span class="n">HEIGHT</span><span class="o">];</span> - <span class="kd">final</span> <span class="kt">int</span> <span class="n">out</span><span class="o">[]</span> <span class="o">=</span> <span class="k">new</span> <span class="kt">int</span><span class="o">[</span><span class="n">WIDTH</span><span class="o">*</span><span class="n">HEIGHT</span><span class="o">];</span> - <span class="n">Kernel</span> <span class="n">kernel</span> <span class="o">=</span> <span class="k">new</span> <span class="n">Kernel</span><span class="o">(){</span> - <span class="kd">public</span> <span class="kt">void</span> <span class="nf">run</span><span class="o">(){</span> - <span class="kt">int</span> <span class="n">x</span> <span class="o">=</span> <span class="n">getGlobalX</span><span class="o">();</span> - <span class="kt">int</span> <span class="n">y</span> <span class="o">=</span> <span class="n">getGlobalY</span><span class="o">();</span> - <span class="k">if</span> <span class="o">(!(</span><span class="n">x</span><span class="o">==</span><span class="mi">1</span> <span class="o">||</span> <span class="n">x</span><span class="o">==(</span><span class="n">getGlobalWidth</span><span class="o">()-</span><span class="mi">1</span><span class="o">)</span> <span class="o">||</span> <span class="n">y</span><span class="o">==</span><span class="mi">1</span> <span class="o">||</span> <span class="n">y</span><span class="o">==(</span><span class="n">getGlobalHeight</span><span class="o">()-</span><span class="mi">1</span><span class="o">)){</span> - <span class="kt">int</span> <span class="n">sum</span> <span class="o">=</span> <span class="mi">0</span><span class="o">;</span> - <span class="k">for</span> <span class="o">(</span><span class="kt">int</span> <span class="n">dx</span> <span class="o">=-</span><span class="mi">1</span><span class="o">;</span> <span class="n">dx</span><span class="o"><</span><span class="mi">2</span><span class="o">;</span> <span class="n">dx</span><span class="o">++){</span> - <span class="k">for</span> <span class="o">(</span><span class="kt">int</span> <span class="n">dy</span> <span class="o">=-</span><span class="mi">1</span><span class="o">;</span> <span class="n">dy</span><span class="o"><</span><span class="mi">2</span><span class="o">;</span> <span class="n">dy</span><span class="o">++){</span> - <span class="n">sum</span><span class="o">+=</span><span class="n">in</span><span class="o">[(</span><span class="n">y</span><span class="o">+</span><span class="n">dy</span><span class="o">)*</span><span class="n">getGlobalWidth</span><span class="o">()+(</span><span class="n">x</span><span class="o">+</span><span class="n">dx</span><span class="o">)];</span> - <span class="o">}</span> - <span class="o">}</span> - <span class="n">out</span><span class="o">[</span><span class="n">y</span><span class="o">*</span><span class="n">getGlobalWidth</span><span class="o">()+</span><span class="n">x</span><span class="o">]</span> <span class="o">=</span> <span class="n">sum</span><span class="o">/</span><span class="mi">9</span><span class="o">;</span> - <span class="c1">// or out[getGlobalID()] = sum/9;</span> - <span class="o">}</span> + <span class="o">}</span> + <span class="n">out</span><span class="o">[</span><span class="n">y</span><span class="o">*</span><span class="n">getGlobalWidth</span><span class="o">()+</span><span class="n">x</span><span class="o">]</span> <span class="o">=</span> <span class="n">sum</span><span class="o">/</span><span class="mi">9</span><span class="o">;</span> + <span class="c1">// or out[getGlobalID()] = sum/9;</span> + <span class="o">}</span> + <span class="o">}</span> + +<span class="o">};</span> +<span class="n">kernel</span><span class="o">.</span><span class="na">executeXY</span><span class="o">(</span><span class="n">WIDTH</span><span class="o">,</span> <span class="n">HEIGHT</span><span class="o">);</span> +</code></pre></div> +<p>Or if we choose the Range class approach.</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="kd">final</span> <span class="kd">static</span> <span class="kt">int</span> <span class="n">WIDTH</span><span class="o">=</span><span class="mi">128</span><span class="o">;</span> +<span class="kd">final</span> <span class="kd">static</span> <span class="kt">int</span> <span class="n">HEIGHT</span><span class="o">=</span><span class="mi">64</span><span class="o">;</span> +<span class="kd">final</span> <span class="kt">int</span> <span class="n">in</span><span class="o">[]</span> <span class="o">=</span> <span class="k">new</span> <span class="kt">int</span><span class="o">[</span><span class="n">WIDTH</span><span class="o">*</span><span class="n">HEIGHT</span><span class="o">];</span> +<span class="kd">final</span> <span class="kt">int</span> <span class="n">out</span><span class="o">[]</span> <span class="o">=</span> <span class="k">new</span> <span class="kt">int</span><span class="o">[</span><span class="n">WIDTH</span><span class="o">*</span><span class="n">HEIGHT</span><span class="o">];</span> +<span class="n">Kernel</span> <span class="n">kernel</span> <span class="o">=</span> <span class="k">new</span> <span class="n">Kernel</span><span class="o">(){</span> + <span class="kd">public</span> <span class="kt">void</span> <span class="nf">run</span><span class="o">(){</span> + <span class="kt">int</span> <span class="n">x</span> <span class="o">=</span> <span class="n">getGlobalX</span><span class="o">();</span> + <span class="kt">int</span> <span class="n">y</span> <span class="o">=</span> <span class="n">getGlobalY</span><span class="o">();</span> + <span class="k">if</span> <span class="o">(!(</span><span class="n">x</span><span class="o">==</span><span class="mi">1</span> <span class="o">||</span> <span class="n">x</span><span class="o">==(</span><span class="n">getGlobalWidth</span><span class="o">()-</span><span class="mi">1</span><span class="o">)</span> <span class="o">||</span> <span class="n">y</span><span class="o">==</span><span class="mi">1</span> <span class="o">||</span> <span class="n">y</span><span class="o">==(</span><span class="n">getGlobalHeight</span><span class="o">()-</span><span class="mi">1</span><span class="o">)){</span> + <span class="kt">int</span> <span class="n">sum</span> <span class="o">=</span> <span class="mi">0</span><span class="o">;</span> + <span class="k">for</span> <span class="o">(</span><span class="kt">int</span> <span class="n">dx</span> <span class="o">=-</span><span class="mi">1</span><span class="o">;</span> <span class="n">dx</span><span class="o"><</span><span class="mi">2</span><span class="o">;</span> <span class="n">dx</span><span class="o">++){</span> + <span class="k">for</span> <span class="o">(</span><span class="kt">int</span> <span class="n">dy</span> <span class="o">=-</span><span class="mi">1</span><span class="o">;</span> <span class="n">dy</span><span class="o"><</span><span class="mi">2</span><span class="o">;</span> <span class="n">dy</span><span class="o">++){</span> + <span class="n">sum</span><span class="o">+=</span><span class="n">in</span><span class="o">[(</span><span class="n">y</span><span class="o">+</span><span class="n">dy</span><span class="o">)*</span><span class="n">getGlobalWidth</span><span class="o">()+(</span><span class="n">x</span><span class="o">+</span><span class="n">dx</span><span class="o">)];</span> <span class="o">}</span> - - <span class="o">};</span> - <span class="n">kernel</span><span class="o">.</span><span class="na">execute</span><span class="o">(</span><span class="n">Range2D</span><span class="o">.</span><span class="na">create</span><span class="o">(</span><span class="n">WIDTH</span><span class="o">,</span> <span class="n">HEIGHT</span><span class="o">));</span> - </code></pre> - <h2>Handling this from JTP mode</h2> - - <p>Mapping to OpenCL for this is all fairly straightforward.</p> - - <p>In Java JTP mode we will have to emulate this. For <code>get_global_id(0..3)</code> (<code>getGlobalX()</code>, <code>getGlobalY()</code> and <code>getGlobalZ()</code> using our proposed Aparapi Java mappings) we can of course easily offer reasonable implementations, this just requires the Java code to essentially nest 3 loops (or emulate) and set <code>globalX</code>, <code>globalY</code>, <code>globalZ</code> inside each nesting.</p> - - <p>For <code>get_local_size(0..3)</code> (<code>getLocalWidth()</code>, <code>getLocalHeight()</code> and <code>getLocalDepth()</code> using our proposed Aparapi Java mappings) we will need to break the globalWidth/globalHeight and globalDepth into some arbitrary equal ‘chunks’ (note I am avoiding using the word groups here to avoid confusion with <code>get_group_size(0..3)</code>!</p> - - <p>At present we always create a synthetic group in JTP mode which is the the # or cores. This will need to be changed. If the user requests a grid (64,64,8,8) (global width 64, global height 64, local width 8, local height 8) then we will have to create a JTP group of 64 (8x8) and just in case the kernel code contains a barrier, we will need to ensure we launch 64 threads for this group. From our experience it is best to launch one thread per core, so we may lose some JTP performance executing in this mode.</p> - </div> - </main> - <footer class='page-footer'> - <div class='container'> - <div class='row'> - <div class='col l4 s12'> - <h5 class='white-text'>Help Aparapi Grow</h5> - <p class='grey-text text-lighten-4'>We are a team of volunteers working on this project like it's our full time job. Any amount would help support and continue development on this project and is greatly appreciated.</p> - <form action='https://www.paypal.com/cgi-bin/webscr' id='paypal-donate' method='post' target='_top'> - <input name='cmd' type='hidden' value='_s-xclick'> - <input name='encrypted' type='hidden' value='-----BEGIN PKCS7-----MIIHoAYJKoZIhvcNAQcEoIIHkTCCB40CAQExggEwMIIBLAIBADCBlDCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20CAQAwDQYJKoZIhvcNAQEBBQAEgYATcKxN8t35TG2x34eY272SuZO3QbGy+BTGIM5DRV6Hmosotzw2TF42ceWmbXb3Gk4Wy5kUgo4TgHExCZHUSlHUl+A9KWLFejotgQJPhbiBsnns3klWbKftA3LEnP/kz/SW7OyBlpluoHoEGb354/aoX3JEctp3akHiZEmD7JyEgjELMAkGBSsOAwIaBQAwggEcBgkqhkiG9w0BBwEwFAYIKoZIhvcNAwcECOGCJwba6JICgIH4RtE1LE3juagKs+swI5tb9Y2LacWo+qn1H1aLKeg57bQMqqcWYvkoO1joYoglPc1h4mO0egZjHPQ6ih0K0IYlXw2SRpNylSlIMUE3GW6smjSSwRhscZfXQYUnmQsfYvkFwoKrlZGf/1u0Q7nwlZ1szIKnDMZ5f+k8xBcM0sMNutn/y9CH6A3zo01gQBIF29+1WYAoQspNAnfWQy3ydV7nbjIA9ThDp2WquWw3EVlvqlvm/3C2AFuH/L4q0ltn3qjkCdzXK0O2jW3TRrzligPkAy6CN0Tw2jGW5GENNC1L92vHFH4kBXUPlhvw39TgoN7/KRUjVoYPYgugggOHMIIDgzCCAuygAwIBAgIBADANBgkqhkiG9w0BAQUFADCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20wHhcNMDQwMjEzMTAxMzE1WhcNMzUwMjEzMTAxMzE1WjCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20wgZ8wDQYJKoZIhvcNAQEBBQADgY0AMIGJAoGBAMFHTt38RMxLXJyO2SmS+Ndl72T7oKJ4u4uw+6awntALWh03PewmIJuzbALScsTS4sZoS1fKciBGoh11gIfHzylvkdNe/hJl66/RGqrj5rFb08sAABNTzDTiqqNpJeBsYs/c2aiGozptX2RlnBktH+SUNpAajW724Nv2Wvhif6sFAgMBAAGjge4wgeswHQYDVR0OBBYEFJaffLvGbxe9WT9S1wob7BDWZJRrMIG7BgNVHSMEgbMwgbCAFJaffLvGbxe9WT9S1wob7BDWZJRroYGUpIGRMIGOMQswCQYDVQQGEwJVUzELMAkGA1UECBMCQ0ExFjAUBgNVBAcTDU1vdW50YWluIFZpZXcxFDASBgNVBAoTC1BheVBhbCBJbmMuMRMwEQYDVQQLFApsaXZlX2NlcnRzMREwDwYDVQQDFAhsaXZlX2FwaTEcMBoGCSqGSIb3DQEJARYNcmVAcGF5cGFsLmNvbYIBADAMBgNVHRMEBTADAQH/MA0GCSqGSIb3DQEBBQUAA4GBAIFfOlaagFrl71+jq6OKidbWFSE+Q4FqROvdgIONth+8kSK//Y/4ihuE4Ymvzn5ceE3S/iBSQQMjyvb+s2TWbQYDwcp129OPIbD9epdr4tJOUNiSojw7BHwYRiPh58S1xGlFgHFXwrEBb3dgNbMUa+u4qectsMAXpVHnD9wIyfmHMYIBmjCCAZYCAQEwgZQwgY4xCzAJBgNVBAYTAlVTMQswCQYDVQQIEwJDQTEWMBQGA1UEBxMNTW91bnRhaW4gVmlldzEUMBIGA1UEChMLUGF5UGFsIEluYy4xEzARBgNVBAsUCmxpdmVfY2VydHMxETAPBgNVBAMUCGxpdmVfYXBpMRwwGgYJKoZIhvcNAQkBFg1yZUBwYXlwYWwuY29tAgEAMAkGBSsOAwIaBQCgXTAYBgkqhkiG9w0BCQMxCwYJKoZIhvcNAQcBMBwGCSqGSIb3DQEJBTEPFw0xNjExMjkyMjA1NTNaMCMGCSqGSIb3DQEJBDEWBBS4i3Exr/pFcKOJy8uKmH+nGIMjqDANBgkqhkiG9w0BAQEFAASBgDAbFZ2jieloeB/0wCAcvYCFAIXmmBaMS5js/byzU5gK7exSTlRMX74IkmHemItaOcw3wyFlu4i118D9K9SbSbFiX9DGDcezGh42u/6G8TuZMwlvmiehwMioTVcm4jWG40YLiv8pJZypfoSx2w4IAFb4na5i/E1qOrwQOpiBho+s-----END PKCS7-----'> - <button alt='PayPal - The safer, easier way to pay online!' class='btn waves-effect waves-light red lighten-3' name='action' type='submit'> - Donate Now - </button> - </form> - </div> - <div class='col l4 s12'> - <h5 class='white-text'>Join the Discussion</h5> - <p class='grey-text text-lighten-4'>We have a Gitter chat room set up where you can talk directly with us. Come in and discuss new features, future goals, general problems or questions, or anything else you can think of.</p> - <a class='btn waves-effect waves-light red lighten-3' href='https://gitter.im/Syncleus/aparapi' target='_blank'>Chat</a> - </div> - <div class='col l4 s12' style='overflow: hidden;'> - <h5 class='white-text'>Connect</h5> - <iframe allowtransparency='true' frameborder='0' height='30' scrolling='0' src='http://ghbtns.com/github-btn.html?user=Syncleus&repo=aparapi&type=watch&count=true&size=large' width='170'></iframe> - <br> - <a class='twitter-follow-button' data-dnt='true' data-show-count='true' data-size='large' href='https://twitter.com/AparapiLib'>Follow @AparapiLib</a> - <br> - <div class='g-follow' data-annotation='bubble' data-height='24' data-href='https://plus.google.com/102266131584900704956' data-rel='publisher'></div> - </div> - </div> - </div> - <div class='footer-copyright'> - <div class='container'> - © 2016-2017 Syncleus, All rights reserved. - <a class='grey-text text-lighten-4 right' href='https://github.com/Syncleus/aparapi/blob/master/LICENSE'>Apache License v2</a> - </div> - </div> - </footer> - <!-- Scripts --> - <script src='https://code.jquery.com/jquery-2.1.4.min.js'></script> - <script> - if (!window.jQuery) { document.write('<script src="bin/jquery-2.1.1.min.js"><\/script>'); } - </script> - <script src='/javascripts/jquery.timeago.js'></script> - <script src='/javascripts/materialize.min.js'></script> - <script src='/javascripts/lunr.min.js'></script> - <script src='/javascripts/search.js'></script> - <script src='/javascripts/materialize.js'></script> - <script src='/javascripts/init.js'></script> - <!-- Twitter Button --> - <script> - !function(d,s,id){var js,fjs=d.getElementsByTagName(s)[0],p=/^http:/.test(d.location)?'http':'https';if(!d.getElementById(id)){js=d.createElement(s);js.id=id;js.src=p+'://platform.twitter.com/widgets.js';fjs.parentNode.insertBefore(js,fjs);}}(document, 'script', 'twitter-wjs'); - </script> - <!-- Google Plus Button --> - <script async='' defer='defer' src='https://apis.google.com/js/platform.js'></script> - </body> + <span class="o">}</span> + <span class="n">out</span><span class="o">[</span><span class="n">y</span><span class="o">*</span><span class="n">getGlobalWidth</span><span class="o">()+</span><span class="n">x</span><span class="o">]</span> <span class="o">=</span> <span class="n">sum</span><span class="o">/</span><span class="mi">9</span><span class="o">;</span> + <span class="c1">// or out[getGlobalID()] = sum/9;</span> + <span class="o">}</span> + <span class="o">}</span> + +<span class="o">};</span> +<span class="n">kernel</span><span class="o">.</span><span class="na">execute</span><span class="o">(</span><span class="n">Range2D</span><span class="o">.</span><span class="na">create</span><span class="o">(</span><span class="n">WIDTH</span><span class="o">,</span> <span class="n">HEIGHT</span><span class="o">));</span> +</code></pre></div> +<h2>Handling this from JTP mode</h2> + +<p>Mapping to OpenCL for this is all fairly straightforward.</p> + +<p>In Java JTP mode we will have to emulate this. For <code>get_global_id(0..3)</code> (<code>getGlobalX()</code>, <code>getGlobalY()</code> and <code>getGlobalZ()</code> using our proposed Aparapi Java mappings) we can of course easily offer reasonable implementations, this just requires the Java code to essentially nest 3 loops (or emulate) and set <code>globalX</code>, <code>globalY</code>, <code>globalZ</code> inside each nesting.</p> + +<p>For <code>get_local_size(0..3)</code> (<code>getLocalWidth()</code>, <code>getLocalHeight()</code> and <code>getLocalDepth()</code> using our proposed Aparapi Java mappings) we will need to break the globalWidth/globalHeight and globalDepth into some arbitrary equal ‘chunks’ (note I am avoiding using the word groups here to avoid confusion with <code>get_group_size(0..3)</code>!</p> + +<p>At present we always create a synthetic group in JTP mode which is the the # or cores. This will need to be changed. If the user requests a grid (64,64,8,8) (global width 64, global height 64, local width 8, local height 8) then we will have to create a JTP group of 64 (8x8) and just in case the kernel code contains a barrier, we will need to ensure we launch 64 threads for this group. From our experience it is best to launch one thread per core, so we may lose some JTP performance executing in this mode.</p> + +</div> +</main> +<footer class='page-footer'> +<div class='container'> +<div class='row'> +<div class='col l4 s12'> +<h5 class='white-text'>Help Aparapi Grow</h5> +<p class='grey-text text-lighten-4'>We are a team of volunteers working on this project like it's our full time job. Any amount would help support and continue development on this project and is greatly appreciated.</p> +<form action='https://www.paypal.com/cgi-bin/webscr' id='paypal-donate' method='post' target='_top'> +<input name='cmd' type='hidden' value='_s-xclick'> +<input name='encrypted' type='hidden' value='-----BEGIN PKCS7-----MIIHoAYJKoZIhvcNAQcEoIIHkTCCB40CAQExggEwMIIBLAIBADCBlDCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20CAQAwDQYJKoZIhvcNAQEBBQAEgYATcKxN8t35TG2x34eY272SuZO3QbGy+BTGIM5DRV6Hmosotzw2TF42ceWmbXb3Gk4Wy5kUgo4TgHExCZHUSlHUl+A9KWLFejotgQJPhbiBsnns3klWbKftA3LEnP/kz/SW7OyBlpluoHoEGb354/aoX3JEctp3akHiZEmD7JyEgjELMAkGBSsOAwIaBQAwggEcBgkqhkiG9w0BBwEwFAYIKoZIhvcNAwcECOGCJwba6JICgIH4RtE1LE3juagKs+swI5tb9Y2LacWo+qn1H1aLKeg57bQMqqcWYvkoO1joYoglPc1h4mO0egZjHPQ6ih0K0IYlXw2SRpNylSlIMUE3GW6smjSSwRhscZfXQYUnmQsfYvkFwoKrlZGf/1u0Q7nwlZ1szIKnDMZ5f+k8xBcM0sMNutn/y9CH6A3zo01gQBIF29+1WYAoQspNAnfWQy3ydV7nbjIA9ThDp2WquWw3EVlvqlvm/3C2AFuH/L4q0ltn3qjkCdzXK0O2jW3TRrzligPkAy6CN0Tw2jGW5GENNC1L92vHFH4kBXUPlhvw39TgoN7/KRUjVoYPYgugggOHMIIDgzCCAuygAwIBAgIBADANBgkqhkiG9w0BAQUFADCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20wHhcNMDQwMjEzMTAxMzE1WhcNMzUwMjEzMTAxMzE1WjCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20wgZ8wDQYJKoZIhvcNAQEBBQADgY0AMIGJAoGBAMFHTt38RMxLXJyO2SmS+Ndl72T7oKJ4u4uw+6awntALWh03PewmIJuzbALScsTS4sZoS1fKciBGoh11gIfHzylvkdNe/hJl66/RGqrj5rFb08sAABNTzDTiqqNpJeBsYs/c2aiGozptX2RlnBktH+SUNpAajW724Nv2Wvhif6sFAgMBAAGjge4wgeswHQYDVR0OBBYEFJaffLvGbxe9WT9S1wob7BDWZJRrMIG7BgNVHSMEgbMwgbCAFJaffLvGbxe9WT9S1wob7BDWZJRroYGUpIGRMIGOMQswCQYDVQQGEwJVUzELMAkGA1UECBMCQ0ExFjAUBgNVBAcTDU1vdW50YWluIFZpZXcxFDASBgNVBAoTC1BheVBhbCBJbmMuMRMwEQYDVQQLFApsaXZlX2NlcnRzMREwDwYDVQQDFAhsaXZlX2FwaTEcMBoGCSqGSIb3DQEJARYNcmVAcGF5cGFsLmNvbYIBADAMBgNVHRMEBTADAQH/MA0GCSqGSIb3DQEBBQUAA4GBAIFfOlaagFrl71+jq6OKidbWFSE+Q4FqROvdgIONth+8kSK//Y/4ihuE4Ymvzn5ceE3S/iBSQQMjyvb+s2TWbQYDwcp129OPIbD9epdr4tJOUNiSojw7BHwYRiPh58S1xGlFgHFXwrEBb3dgNbMUa+u4qectsMAXpVHnD9wIyfmHMYIBmjCCAZYCAQEwgZQwgY4xCzAJBgNVBAYTAlVTMQswCQYDVQQIEwJDQTEWMBQGA1UEBxMNTW91bnRhaW4gVmlldzEUMBIGA1UEChMLUGF5UGFsIEluYy4xEzARBgNVBAsUCmxpdmVfY2VydHMxETAPBgNVBAMUCGxpdmVfYXBpMRwwGgYJKoZIhvcNAQkBFg1yZUBwYXlwYWwuY29tAgEAMAkGBSsOAwIaBQCgXTAYBgkqhkiG9w0BCQMxCwYJKoZIhvcNAQcBMBwGCSqGSIb3DQEJBTEPFw0xNjExMjkyMjA1NTNaMCMGCSqGSIb3DQEJBDEWBBS4i3Exr/pFcKOJy8uKmH+nGIMjqDANBgkqhkiG9w0BAQEFAASBgDAbFZ2jieloeB/0wCAcvYCFAIXmmBaMS5js/byzU5gK7exSTlRMX74IkmHemItaOcw3wyFlu4i118D9K9SbSbFiX9DGDcezGh42u/6G8TuZMwlvmiehwMioTVcm4jWG40YLiv8pJZypfoSx2w4IAFb4na5i/E1qOrwQOpiBho+s-----END PKCS7----- +'> +<button alt='PayPal - The safer, easier way to pay online!' class='btn waves-effect waves-light red lighten-3' name='action' type='submit'> +Donate Now +</button> +</form> +</div> +<div class='col l4 s12'> +<h5 class='white-text'>Join the Discussion</h5> +<p class='grey-text text-lighten-4'>We have a Gitter chat room set up where you can talk directly with us. Come in and discuss new features, future goals, general problems or questions, or anything else you can think of.</p> +<a class='btn waves-effect waves-light red lighten-3' href='https://gitter.im/Syncleus/aparapi' target='_blank'>Chat</a> +</div> +<div class='col l4 s12' style='overflow: hidden;'> +<h5 class='white-text'>Connect</h5> +<iframe allowtransparency='true' frameborder='0' height='30' scrolling='0' src='http://ghbtns.com/github-btn.html?user=Syncleus&repo=aparapi&type=watch&count=true&size=large' width='170'></iframe> +<br> +<a class='twitter-follow-button' data-dnt='true' data-show-count='true' data-size='large' href='https://twitter.com/AparapiLib'>Follow @AparapiLib</a> +<br> +<div class='g-follow' data-annotation='bubble' data-height='24' data-href='https://plus.google.com/102266131584900704956' data-rel='publisher'></div> +</div> +</div> +</div> +<div class='footer-copyright'> +<div class='container'> +© 2016-2017 Syncleus, All rights reserved. +<a class='grey-text text-lighten-4 right' href='https://github.com/Syncleus/aparapi/blob/master/LICENSE'>Apache License v2</a> +</div> +</div> +</footer> +<!-- Scripts --> +<script src='https://code.jquery.com/jquery-2.1.4.min.js'></script> +<script> + if (!window.jQuery) { document.write('<script src="bin/jquery-2.1.1.min.js"><\/script>'); } +</script> +<script src='/javascripts/jquery.timeago.js'></script> +<script src='/javascripts/materialize.min.js'></script> +<script src='/javascripts/lunr.min.js'></script> +<script src='/javascripts/search.js'></script> +<script src='/javascripts/materialize.js'></script> +<script src='/javascripts/init.js'></script> +<!-- Twitter Button --> +<script> + !function(d,s,id){var js,fjs=d.getElementsByTagName(s)[0],p=/^http:/.test(d.location)?'http':'https';if(!d.getElementById(id)){js=d.createElement(s);js.id=id;js.src=p+'://platform.twitter.com/widgets.js';fjs.parentNode.insertBefore(js,fjs);}}(document, 'script', 'twitter-wjs'); +</script> +<!-- Google Plus Button --> +<script async='' defer='defer' src='https://apis.google.com/js/platform.js'></script> +</body> </html> diff --git a/proposals/multiple-entry-points.html b/proposals/multiple-entry-points.html index 448b4b4e139494e24d8724a60bc31dcff4889677..51efe04df7a62a485fade652fd5c8d37c5ae572e 100644 --- a/proposals/multiple-entry-points.html +++ b/proposals/multiple-entry-points.html @@ -1,495 +1,496 @@ <!DOCTYPE html> <html lang='en'> - <head> - <meta content='text/html; charset=UTF-8' http-equiv='Content-Type'> - <meta content='width=device-width, initial-scale=1' name='viewport'> - <meta content='IE=edge' http-equiv='X-UA-Compatible'> - <meta content='no' name='msapplication-tap-highlight'> - <meta content='Aparapi is an Open-source framework for executing native Java code on the GPU, developed by Syncleus.' name='description'> - <title> - Aparapi | Multiple Entry Points - </title> - <!-- Favicons --> - <link href='/images/favicon/apple-touch-icon-152x152.png' rel='apple-touch-icon-precomposed'> - <meta content='#FFFFFF' name='msapplication-TileColor'> - <meta content='/images/favicon/mstile-144x144.png' name='msapplication-TileImage'> - <link href='/images/favicon/favicon-32x32.png' rel='icon' sizes='32x32'> - <!-- Android 5 Chrome Color --> - <meta content='#EE6E73' name='theme-color'> - <!-- CSS --> - <link href='/stylesheets/highlight.css' media='screen,projection' rel='stylesheet' type='text/css'> - <link href='/stylesheets/style.css' media='screen,projection' rel='stylesheet' type='text/css'> - <link href='http://fonts.googleapis.com/css?family=Inconsolata' rel='stylesheet' type='text/css'> - <link href='http://fonts.googleapis.com/icon?family=Material+Icons' rel='stylesheet'> - </head> - <body> - <header> - <div class='container'> - <a class='button-collapse top-nav waves-effect waves-light circle hide-on-large-only' data-activates='nav-mobile' href='#'> - <i class='material-icons'>menu</i> - </a> - </div> - <ul class='side-nav fixed' id='nav-mobile'> - <li class='logo'> - <a class='brand-logo' href='/' id='logo-container'> - <object data='/images/logo.svg' id='front-page-logo' type='image/svg+xml'>Your browser does not support SVG</object> - </a> - </li> - <li class='search'> - <div class='search-wrapper card'> - <input id='search'> - <i class='material-icons'>search</i> - <div class='search-results'></div> - </div> - </li> - <li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/">Overview</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Introduction</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/about.html">About</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/getting-started.html">Getting Started</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/faq.html">FAQ</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Documentation</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/aparapi-patterns.html">Aparapi Patterns</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/choosing-specific-devices.html">Choosing Specific Devices</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/converting-java-to-opencl.html">Converting Java to OpenCL</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/emulating-multiple-entrypoints.html">Emulating Multiple Entrypoints</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/explicit-buffer-handling.html">Explicit Buffer Handling</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/hsa-enabled-lambda.html">HSA Enabled Lambda</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/kernel-guidelines.html">Kernel Guidelines</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/library-agent-duality.html">Library Agent Duality</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/new-features.html">New Features</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/opencl-bindings.html">OpenCL Bindings</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/private-memory-space.html">Private Memory Space</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/profiling-the-kernel.html">Profiling the Kernel</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/setting-up-hsa.html">Setting Up HSA</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/unit-tests.html">Unit Tests</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/using-hsa-simulator.html">Using HSA Simulator</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/constant-memory.html">Constant Memory</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/local-memory.html">Local Memory</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/multiple-dim-ranges.html">Multiple Dim Ranges</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Proposals</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/multiple-dim-nd-range.html">Multiple Dim ND Range</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/lambdas.html">Lambdas</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/address-space-with-buffers.html">Address Space with Buffers</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/extensions.html">Extensions</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/device.html">Device</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/multiple-entry-points.html">Multiple Entry Points</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/lambda-syntax.html">Lambda Syntax</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/showcase.html">Showcase</a></li></ul></li> - </ul> - </header> - <main> - <div class='section no-pad-bot' id='index-banner'> - <div class='container'> - <h1 class='header center-on-small-only'>Multiple Entry Points</h1> - <div class='row center'> - <h4 class='header col s12 light center'>How to extend Aparapi to allow multiple entrypoints for kernels.</h4> - </div> - - </div> - - </div> - <div class='container'> - <h2>The Current Single Entrypoint World</h2> - - <p>At present Aparapi allows us to dispatch execution to a single ‘single entry point’ in a Kernel. Essentially for each Kernel only the overridden Kernel.run() method can be used to initiate execution on the GPU.</p> - - <p>Our canonical example is the 'Squarer’ Kernel which allows us to create squares for each element in an input array in an output array.</p> - <pre class="highlight java"><code> - <span class="n">Kernel</span> <span class="n">squarer</span> <span class="o">=</span> <span class="k">new</span> <span class="n">Kernel</span><span class="o">(){</span> - <span class="nd">@Overide</span> <span class="kd">public</span> <span class="kt">void</span> <span class="nf">run</span><span class="o">(){</span> - <span class="kt">int</span> <span class="n">id</span> <span class="o">=</span> <span class="n">getGlobalId</span><span class="o">(</span><span class="mi">0</span><span class="o">);</span> - <span class="n">out</span><span class="o">[</span><span class="n">id</span><span class="o">]</span> <span class="o">=</span> <span class="n">in</span><span class="o">[</span><span class="n">id</span><span class="o">]</span> <span class="o">*</span> <span class="n">in</span><span class="o">[</span><span class="n">id</span><span class="o">];</span> - <span class="o">}</span> - <span class="o">};</span> - </code></pre> - <p>If we wanted a vector addition Kernel we would have to create a whole new Kernel.</p> - <pre class="highlight java"><code> - <span class="n">Kernel</span> <span class="n">adder</span> <span class="o">=</span> <span class="k">new</span> <span class="n">Kernel</span><span class="o">(){</span> - <span class="nd">@Overide</span> <span class="kd">public</span> <span class="kt">void</span> <span class="nf">run</span><span class="o">(){</span> - <span class="kt">int</span> <span class="n">id</span> <span class="o">=</span> <span class="n">getGlobalId</span><span class="o">(</span><span class="mi">0</span><span class="o">);</span> - <span class="n">out</span><span class="o">[</span><span class="n">id</span><span class="o">]</span> <span class="o">=</span> <span class="n">in</span><span class="o">[</span><span class="n">id</span><span class="o">]</span> <span class="o">*</span> <span class="n">in</span><span class="o">[</span><span class="n">id</span><span class="o">];</span> - <span class="o">}</span> - <span class="o">};</span> - </code></pre> - <p>For us to square and then add a constant we would have to invoke two kernels. Or of course create single SquarerAdder kernel.</p> - - <p>See this page EmulatingMultipleEntrypointsUsingCurrentAPI for ideas on how to emulate having multiple methods, by passing data to a single run() method.</p> - - <h2>Why can’t Aparapi just allow 'arbitary’ methods</h2> - - <p>Ideally we would just expose a more natural API, one which allows us to provide specific methods for each arithmetic operation.</p> - - <p>Essentially</p> - <pre class="highlight java"><code> - <span class="kd">class</span> <span class="nc">VectorKernel</span> <span class="kd">extends</span> <span class="n">Kernel</span><span class="o">{</span> - <span class="kd">public</span> <span class="kt">void</span> <span class="nf">add</span><span class="o">();</span> - <span class="kd">public</span> <span class="kt">void</span> <span class="nf">sub</span><span class="o">();</span> - <span class="kd">public</span> <span class="kt">void</span> <span class="nf">sqr</span><span class="o">();</span> - <span class="kd">public</span> <span class="kt">void</span> <span class="nf">sqrt</span><span class="o">();</span> - <span class="o">}</span> - </code></pre> - <p>Unfortunately this is hard to implement using Aparapi. There are two distinct problems, both at runtime.</p> - - <ul> - <li>How will Aparapi know which of the available methods we want to execute when we call Kernel.execute(range)?</li> - <li>On first execution how does Aparapi determine which methods might be entrypoints and are therefore need to be converted to OpenCL?</li> - </ul> - - <p>The first problem can be solved by extending Kernel.execute() to accept a method name</p> - <pre class="highlight java"><code> - <span class="n">kernel</span><span class="o">.</span><span class="na">execute</span><span class="o">(</span><span class="n">SIZE</span><span class="o">,</span> <span class="s">"add"</span><span class="o">);</span> - </code></pre> - <p>This is the obvious solution, but really causes maintenence issues int that it trades compile time reporting for a runtime errors. If a developer mistypes the name of the method, :-</p> - <pre class="highlight java"><code> - <span class="n">kernel</span><span class="o">.</span><span class="na">execute</span><span class="o">(</span><span class="n">SIZE</span><span class="o">,</span> <span class="s">"sadd"</span><span class="o">);</span> <span class="c1">// there is no such method</span> - </code></pre> - <p>The code will compile perfectly, only at runtime will we detect that there is no such method.</p> - - <h2>An aside</h2> - - <p>Maybe the new Java 8 method reference feature method might help here. In the paper below Brian Goetz talks about a double-colon syntax (Class::Method) for directly referencing a method which is presumably checked at compile time.</p> - - <p>So presumably</p> - <pre class="highlight java"><code> - <span class="n">kernel</span><span class="o">.</span><span class="na">execute</span><span class="o">(</span><span class="n">SIZE</span><span class="o">,</span> <span class="nl">VectorKernel:</span><span class="o">:</span><span class="n">add</span><span class="o">);</span> - </code></pre> - <p>Would compile just fine, whereby</p> - <pre class="highlight java"><code> - <span class="n">kernel</span><span class="o">.</span><span class="na">execute</span><span class="o">(</span><span class="n">SIZE</span><span class="o">,</span> <span class="nl">VectorKernel:</span><span class="o">:</span><span class="n">sadd</span><span class="o">);</span> - </code></pre> - <p>Would yield a compile time error.</p> - - <p>See Brian Goetz’s excellent Lambda documentation</p> - - <h2>back from Aside</h2> - - <p>The second problem (knowing which methods need to be converted to OpenCL) can probably be solved using an Annotation.</p> - <pre class="highlight java"><code> - <span class="kd">class</span> <span class="nc">VectorKernel</span> <span class="kd">extends</span> <span class="n">Kernel</span><span class="o">{</span> - <span class="nd">@EntryPoint</span> <span class="kd">public</span> <span class="kt">void</span> <span class="nf">add</span><span class="o">();</span> - <span class="nd">@EntryPoint</span> <span class="kd">public</span> <span class="kt">void</span> <span class="nf">sub</span><span class="o">();</span> - <span class="nd">@EntryPoint</span> <span class="kd">public</span> <span class="kt">void</span> <span class="nf">sqr</span><span class="o">();</span> - <span class="nd">@EntryPoint</span> <span class="kd">public</span> <span class="kt">void</span> <span class="nf">sqrt</span><span class="o">();</span> - <span class="kd">public</span> <span class="kt">void</span> <span class="nf">nonOpenCLMethod</span><span class="o">();</span> - <span class="o">}</span> - </code></pre> - <p>Here the @EntryPoint annotation allows the Aparapi runtime to determine which methods need to be exposed.</p> - - <h1>My Extension Proposal</h1> - - <p>Here is my proposal. Not only does it allow us to reference multiple entryoints, but I think it actually improves the single entrypoint API, albeit at the cost of being more verbose.</p> - - <h2>The developer must provide an API interface</h2> - - <p>First I propose that we should ask the developer to provide an interface for all methods that we wish to execute on the GPU (or convert to OpenCL).</p> - <pre class="highlight java"><code> - <span class="kd">interface</span> <span class="nc">VectorAPI</span> <span class="kd">extends</span> <span class="n">AparapiAPI</span> <span class="o">{</span> - <span class="kd">public</span> <span class="kt">void</span> <span class="nf">add</span><span class="o">(</span><span class="n">Range</span> <span class="n">range</span><span class="o">);</span> - <span class="kd">public</span> <span class="kt">void</span> <span class="nf">sub</span><span class="o">(</span><span class="n">Range</span> <span class="n">range</span><span class="o">);</span> - <span class="kd">public</span> <span class="kt">void</span> <span class="nf">sqrt</span><span class="o">(</span><span class="n">Range</span> <span class="n">range</span><span class="o">);</span> - <span class="kd">public</span> <span class="kt">void</span> <span class="nf">sqr</span><span class="o">(</span><span class="n">Range</span> <span class="n">range</span><span class="o">);</span> - <span class="o">}</span> - </code></pre> - <p>Note that each API takes a Range, this will make more sense in a moment.</p> - - <h2>The developer provides a bound implementation</h2> - - <p>Aparapi should provide a mechanism for mapping the proposed implementation of the API to it’s implementation.</p> - - <p>Note the weasel words here, this is not a conventional implementation of an interface. We will use an annotation (@Implements(Class class)) to provide the binding.</p> - <pre class="highlight java"><code> - <span class="nd">@Implements</span><span class="o">(</span><span class="n">VectorAPI</span><span class="o">.</span><span class="na">class</span><span class="o">)</span> <span class="kd">class</span> <span class="nc">Vector</span> <span class="kd">extends</span> <span class="n">Kernel</span> <span class="o">{</span> - <span class="kd">public</span> <span class="kt">void</span> <span class="nf">add</span><span class="o">(</span><span class="n">RangeId</span> <span class="n">rangeId</span><span class="o">){</span><span class="cm">/*implementation here */</span><span class="o">}</span> - <span class="kd">public</span> <span class="kt">void</span> <span class="nf">sub</span><span class="o">(</span><span class="n">RangeId</span> <span class="n">rangeId</span><span class="o">){</span><span class="cm">/*implementation here */</span><span class="o">}</span> - <span class="kd">public</span> <span class="kt">void</span> <span class="nf">sqrt</span><span class="o">(</span><span class="n">RangeId</span> <span class="n">rangeId</span><span class="o">){</span><span class="cm">/*implementation here */</span><span class="o">}</span> - <span class="kd">public</span> <span class="kt">void</span> <span class="nf">sqr</span><span class="o">(</span><span class="n">RangeId</span> <span class="n">rangeId</span><span class="o">){</span><span class="cm">/*implementation here */</span><span class="o">}</span> - <span class="kd">public</span> <span class="kt">void</span> <span class="kd">public</span> <span class="kt">void</span> <span class="nf">nonOpenCLMethod</span><span class="o">();</span> - <span class="o">}</span> - </code></pre> - <h2>Why we can’t the implementation just implement the interface?</h2> - - <p>This would be ideal. Sadly we need to intercept a call to say VectorAPI.add(Range) and dispatch to the resulting Vector.add(RangeId) instances. If you look at the signatures, the interface accepts a Range as it’s arg (the range over which we intend to execute) whereas the implementation (either called by JTP threads or GPU OpenCL dispatch) receives a RangeId (containing the unique globalId, localId, etc fields). At the very end of this page I show a strawman implementation of a sequential loop implementation.</p> - - <h2>So how do we get an implementation of VectorAPI</h2> - - <p>We instantiate our Kernel by creating an instance using new. We then ask this instance to create an API instance. Some presumably java.util.Proxy trickery will create an implementation of the actual instance, backed by the Java implementation.</p> - - <p>So execution would look something like.</p> - <pre class="highlight java"><code> - <span class="n">Vector</span> <span class="n">kernel</span> <span class="o">=</span> <span class="k">new</span> <span class="n">Vector</span><span class="o">();</span> - <span class="n">VectorAPI</span> <span class="n">kernelApi</span> <span class="o">=</span> <span class="n">kernel</span><span class="o">.</span><span class="na">api</span><span class="o">();</span> - <span class="n">Range</span> <span class="n">range</span> <span class="o">=</span> <span class="n">Range</span><span class="o">.</span><span class="na">create</span><span class="o">(</span><span class="n">SIZE</span><span class="o">);</span> - <span class="n">kernalApi</span><span class="o">.</span><span class="na">add</span><span class="o">(</span><span class="n">range</span><span class="o">);</span> - </code></pre> - <p>So the Vector instance is a pure Java implementation. The extracted API is the bridge to the GPU.</p> - - <p>Of course then we can also execute using an inline call through api()</p> - <pre class="highlight java"><code> - <span class="n">Vector</span> <span class="n">kernel</span> <span class="o">=</span> <span class="k">new</span> <span class="n">Vector</span><span class="o">();</span> - <span class="n">Range</span> <span class="n">range</span> <span class="o">=</span> <span class="n">Range</span><span class="o">.</span><span class="na">create</span><span class="o">(</span><span class="n">SIZE</span><span class="o">);</span> - <span class="n">kernel</span><span class="o">.</span><span class="na">api</span><span class="o">().</span><span class="na">add</span><span class="o">(</span><span class="n">range</span><span class="o">);</span> - <span class="n">kernel</span><span class="o">.</span><span class="na">api</span><span class="o">().</span><span class="na">sqrt</span><span class="o">(</span><span class="n">range</span><span class="o">);</span> - </code></pre> - <p>or even expose api as public final fields</p> - <pre class="highlight java"><code> - <span class="n">Vector</span> <span class="n">kernel</span> <span class="o">=</span> <span class="k">new</span> <span class="n">Vector</span><span class="o">();</span> - <span class="n">Range</span> <span class="n">range</span> <span class="o">=</span> <span class="n">Range</span><span class="o">.</span><span class="na">create</span><span class="o">(</span><span class="n">SIZE</span><span class="o">);</span> - <span class="n">kernel</span><span class="o">.</span><span class="na">api</span><span class="o">.</span><span class="na">add</span><span class="o">(</span><span class="n">range</span><span class="o">);</span> - <span class="n">kernel</span><span class="o">.</span><span class="na">api</span><span class="o">.</span><span class="na">sqrt</span><span class="o">(</span><span class="n">range</span><span class="o">);</span> - </code></pre> - <h2>How would our canonical Squarer example look</h2> - <pre class="highlight java"><code> - <span class="kd">interface</span> <span class="nc">SquarerAPI</span> <span class="kd">extends</span> <span class="n">AparapiAPI</span><span class="o">{</span> - <span class="n">square</span><span class="o">(</span><span class="n">Range</span> <span class="n">range</span><span class="o">);</span> - <span class="o">}</span> - - <span class="nd">@Implement</span><span class="o">(</span><span class="n">SquarerAPI</span><span class="o">)</span> <span class="kd">class</span> <span class="nc">Squarer</span> <span class="kd">extends</span> <span class="n">Kernel</span><span class="o">{</span> - <span class="kt">int</span> <span class="n">in</span><span class="o">[];</span> - <span class="kt">int</span> <span class="n">square</span><span class="o">[];</span> - <span class="kd">public</span> <span class="kt">void</span> <span class="nf">square</span><span class="o">(</span><span class="n">RangeId</span> <span class="n">rangeId</span><span class="o">){</span> - <span class="n">square</span><span class="o">[</span><span class="n">rangeId</span><span class="o">.</span><span class="na">gid</span><span class="o">]</span> <span class="o">=</span> <span class="n">in</span><span class="o">[</span><span class="n">rangeId</span><span class="o">.</span><span class="na">gid</span><span class="o">]*</span><span class="n">in</span><span class="o">[</span><span class="n">rangeId</span><span class="o">.</span><span class="na">gid</span><span class="o">];</span> - <span class="o">}</span> - <span class="o">}</span> - </code></pre> - <p>Then we execute using</p> - <pre class="highlight java"><code> - <span class="n">Squarer</span> <span class="n">squarer</span> <span class="o">=</span> <span class="k">new</span> <span class="n">Squarer</span><span class="o">();</span> - <span class="c1">// fill squarer.in[SIZE]</span> - <span class="c1">// create squarer.values[SIZE];</span> - - - <span class="n">squarer</span><span class="o">.</span><span class="na">api</span><span class="o">().</span><span class="na">square</span><span class="o">(</span><span class="n">Range</span><span class="o">.</span><span class="na">create</span><span class="o">(</span><span class="n">SIZE</span><span class="o">));</span> - </code></pre> - <h1>Extending this proposal to allow argument passing</h1> - - <p>Note that we have effectively replaced the use of the 'abstract’ squarer.execute(range) with the more concrete squarer.api().add(range).</p> - - <p>Now I would like to propose that we take one more step by allowing us to pass arguments to our methods.</p> - - <p>Normally Aparapi captures buffer and field accesses to create the args that it passes to the generated OpenCL code. In our canonical squarer example the <code>in[]</code> and <code>square[]</code> buffers are captured from the bytecode and passed (behind the scenes) to the OpenCL.</p> - - <p>However, by exposing the actual method we want to execute, we could also allow the API to accept parameters.</p> - - <p>So our squarer example would go from</p> - <pre class="highlight java"><code> - <span class="kd">interface</span> <span class="nc">SquarerAPI</span> <span class="kd">extends</span> <span class="n">AparapiAPI</span><span class="o">{</span> - <span class="n">square</span><span class="o">(</span><span class="n">Range</span> <span class="n">range</span><span class="o">);</span> - <span class="o">}</span> - - <span class="nd">@Implement</span><span class="o">(</span><span class="n">SquarerAPI</span><span class="o">)</span> <span class="kd">class</span> <span class="nc">Squarer</span> <span class="kd">extends</span> <span class="n">Kernel</span><span class="o">{</span> - <span class="kt">int</span> <span class="n">in</span><span class="o">[];</span> - <span class="kt">int</span> <span class="n">square</span><span class="o">[];</span> - <span class="kd">public</span> <span class="kt">void</span> <span class="nf">square</span><span class="o">(</span><span class="n">RangeId</span> <span class="n">rangeId</span><span class="o">){</span> - <span class="n">square</span><span class="o">[</span><span class="n">rangeId</span><span class="o">.</span><span class="na">gid</span><span class="o">]</span> <span class="o">=</span> <span class="n">in</span><span class="o">[</span><span class="n">rangeId</span><span class="o">.</span><span class="na">gid</span><span class="o">]*</span><span class="n">in</span><span class="o">[</span><span class="n">rangeId</span><span class="o">.</span><span class="na">gid</span><span class="o">];</span> - <span class="o">}</span> - <span class="o">}</span> - - - <span class="n">Squarer</span> <span class="n">squarer</span> <span class="o">=</span> <span class="k">new</span> <span class="n">Squarer</span><span class="o">();</span> - <span class="c1">// fill squarer.in[SIZE]</span> - <span class="c1">// create squarer.values[SIZE];</span> - - <span class="n">squarer</span><span class="o">.</span><span class="na">api</span><span class="o">().</span><span class="na">square</span><span class="o">(</span><span class="n">Range</span><span class="o">.</span><span class="na">create</span><span class="o">(</span><span class="n">SIZE</span><span class="o">));</span> - </code></pre> - <p>to</p> - <pre class="highlight java"><code> - <span class="kd">interface</span> <span class="nc">SquarerAPI</span> <span class="kd">extends</span> <span class="n">AparapiAPI</span><span class="o">{</span> - <span class="n">square</span><span class="o">(</span><span class="n">Range</span> <span class="n">range</span><span class="o">,</span> <span class="kt">int</span><span class="o">[]</span> <span class="n">in</span><span class="o">,</span> <span class="kt">int</span><span class="o">[]</span> <span class="n">square</span><span class="o">);</span> - <span class="o">}</span> - - <span class="nd">@Implement</span><span class="o">(</span><span class="n">SquarerAPI</span><span class="o">)</span> <span class="kd">class</span> <span class="nc">Squarer</span> <span class="kd">extends</span> <span class="n">Kernel</span><span class="o">{</span> - <span class="kd">public</span> <span class="kt">void</span> <span class="nf">square</span><span class="o">(</span><span class="n">RangeId</span> <span class="n">rangeId</span><span class="o">,</span> <span class="kt">int</span><span class="o">[]</span> <span class="n">in</span><span class="o">,</span> <span class="kt">int</span><span class="o">[]</span> <span class="n">square</span><span class="o">){</span> - <span class="n">square</span><span class="o">[</span><span class="n">rangeId</span><span class="o">.</span><span class="na">gid</span><span class="o">]</span> <span class="o">=</span> <span class="n">in</span><span class="o">[</span><span class="n">rangeId</span><span class="o">.</span><span class="na">gid</span><span class="o">]*</span><span class="n">in</span><span class="o">[</span><span class="n">rangeId</span><span class="o">.</span><span class="na">gid</span><span class="o">];</span> - <span class="o">}</span> - <span class="o">}</span> - - - <span class="n">Squarer</span> <span class="n">squarer</span> <span class="o">=</span> <span class="k">new</span> <span class="n">Squarer</span><span class="o">();</span> - <span class="kt">int</span><span class="o">[]</span> <span class="n">in</span> <span class="o">=</span> <span class="c1">// create and fill squarer.in[SIZE]</span> - <span class="kt">int</span><span class="o">[]</span> <span class="n">square</span> <span class="o">=</span> <span class="c1">// create squarer.values[SIZE];</span> - - <span class="n">squarer</span><span class="o">.</span><span class="na">api</span><span class="o">().</span><span class="na">square</span><span class="o">(</span><span class="n">Range</span><span class="o">.</span><span class="na">create</span><span class="o">(</span><span class="n">SIZE</span><span class="o">),</span> <span class="n">in</span><span class="o">,</span> <span class="n">result</span><span class="o">);</span> - </code></pre> - <p>I think that this makes Aparapi look more conventional. It also allows us to allow overloading for the first time.</p> - <pre class="highlight java"><code> - <span class="kd">interface</span> <span class="nc">SquarerAPI</span> <span class="kd">extends</span> <span class="n">AparapiAPI</span><span class="o">{</span> - <span class="n">square</span><span class="o">(</span><span class="n">Range</span> <span class="n">range</span><span class="o">,</span> <span class="kt">int</span><span class="o">[]</span> <span class="n">in</span><span class="o">,</span> <span class="kt">int</span><span class="o">[]</span> <span class="n">square</span><span class="o">);</span> - <span class="n">square</span><span class="o">(</span><span class="n">Range</span> <span class="n">range</span><span class="o">,</span> <span class="kt">float</span><span class="o">[]</span> <span class="n">in</span><span class="o">,</span> <span class="kt">float</span><span class="o">[]</span> <span class="n">square</span><span class="o">);</span> - <span class="o">}</span> - - <span class="nd">@Implement</span><span class="o">(</span><span class="n">SquarerAPI</span><span class="o">)</span> <span class="kd">class</span> <span class="nc">Squarer</span> <span class="kd">extends</span> <span class="n">Kernel</span><span class="o">{</span> - <span class="kd">public</span> <span class="kt">void</span> <span class="nf">square</span><span class="o">(</span><span class="n">RangeId</span> <span class="n">rangeId</span><span class="o">,</span> <span class="kt">int</span><span class="o">[]</span> <span class="n">in</span><span class="o">,</span> <span class="kt">int</span><span class="o">[]</span> <span class="n">square</span><span class="o">){</span> - <span class="n">square</span><span class="o">[</span><span class="n">rangeId</span><span class="o">.</span><span class="na">gid</span><span class="o">]</span> <span class="o">=</span> <span class="n">in</span><span class="o">[</span><span class="n">rangeId</span><span class="o">.</span><span class="na">gid</span><span class="o">]*</span><span class="n">in</span><span class="o">[</span><span class="n">rangeId</span><span class="o">.</span><span class="na">gid</span><span class="o">];</span> - <span class="o">}</span> - <span class="kd">public</span> <span class="kt">void</span> <span class="nf">square</span><span class="o">(</span><span class="n">RangeId</span> <span class="n">rangeId</span><span class="o">,</span> <span class="kt">float</span><span class="o">[]</span> <span class="n">in</span><span class="o">,</span> <span class="kt">float</span><span class="o">[]</span> <span class="n">square</span><span class="o">){</span> - <span class="n">square</span><span class="o">[</span><span class="n">rangeId</span><span class="o">.</span><span class="na">gid</span><span class="o">]</span> <span class="o">=</span> <span class="n">in</span><span class="o">[</span><span class="n">rangeId</span><span class="o">.</span><span class="na">gid</span><span class="o">]*</span><span class="n">in</span><span class="o">[</span><span class="n">rangeId</span><span class="o">.</span><span class="na">gid</span><span class="o">];</span> - <span class="o">}</span> - <span class="o">}</span> - - - <span class="n">Squarer</span> <span class="n">squarer</span> <span class="o">=</span> <span class="k">new</span> <span class="n">Squarer</span><span class="o">();</span> - <span class="kt">int</span><span class="o">[]</span> <span class="n">in</span> <span class="o">=</span> <span class="c1">// create and fill squarer.in[SIZE]</span> - <span class="kt">int</span><span class="o">[]</span> <span class="n">square</span> <span class="o">=</span> <span class="c1">// create squarer.values[SIZE];</span> - - <span class="n">squarer</span><span class="o">.</span><span class="na">api</span><span class="o">().</span><span class="na">square</span><span class="o">(</span><span class="n">Range</span><span class="o">.</span><span class="na">create</span><span class="o">(</span><span class="n">SIZE</span><span class="o">),</span> <span class="n">in</span><span class="o">,</span> <span class="n">result</span><span class="o">);</span> - <span class="kt">float</span><span class="o">[]</span> <span class="n">inf</span> <span class="o">=</span> <span class="c1">// create and fill squarer.in[SIZE]</span> - <span class="kt">float</span><span class="o">[]</span> <span class="n">squaref</span> <span class="o">=</span> <span class="c1">// create squarer.values[SIZE];</span> - - <span class="n">squarer</span><span class="o">.</span><span class="na">api</span><span class="o">().</span><span class="na">square</span><span class="o">(</span><span class="n">Range</span><span class="o">.</span><span class="na">create</span><span class="o">(</span><span class="n">SIZE</span><span class="o">),</span> <span class="n">inf</span><span class="o">,</span> <span class="n">resultf</span><span class="o">);</span> - </code></pre> - <p>test harness</p> - <pre class="highlight java"><code> - <span class="kn">import</span> <span class="nn">java.lang.reflect.InvocationHandler</span><span class="o">;</span> - <span class="kn">import</span> <span class="nn">java.lang.reflect.Method</span><span class="o">;</span> - <span class="kn">import</span> <span class="nn">java.lang.reflect.Proxy</span><span class="o">;</span> - - - <span class="kd">public</span> <span class="kd">class</span> <span class="nc">Ideal</span><span class="o">{</span> - - <span class="kd">public</span> <span class="kd">static</span> <span class="kd">class</span> <span class="nc">OpenCLInvocationHandler</span><span class="o"><</span><span class="n">T</span><span class="o">></span> <span class="kd">implements</span> <span class="n">InvocationHandler</span> <span class="o">{</span> - <span class="n">Object</span> <span class="n">instance</span><span class="o">;</span> - <span class="n">OpenCLInvocationHandler</span><span class="o">(</span><span class="n">Object</span> <span class="n">_instance</span><span class="o">){</span> - <span class="n">instance</span> <span class="o">=</span> <span class="n">_instance</span><span class="o">;</span> - <span class="o">}</span> - <span class="nd">@Override</span> <span class="kd">public</span> <span class="n">Object</span> <span class="nf">invoke</span><span class="o">(</span><span class="n">Object</span> <span class="n">interfaceThis</span><span class="o">,</span> <span class="n">Method</span> <span class="n">interfaceMethod</span><span class="o">,</span> <span class="n">Object</span><span class="o">[]</span> <span class="n">interfaceArgs</span><span class="o">)</span> <span class="kd">throws</span> <span class="n">Throwable</span> <span class="o">{</span> - <span class="n">Class</span> <span class="n">clazz</span> <span class="o">=</span> <span class="n">instance</span><span class="o">.</span><span class="na">getClass</span><span class="o">();</span> - - <span class="n">Class</span><span class="o">[]</span> <span class="n">argTypes</span> <span class="o">=</span> <span class="n">interfaceMethod</span><span class="o">.</span><span class="na">getParameterTypes</span><span class="o">();</span> - <span class="n">argTypes</span><span class="o">[</span><span class="mi">0</span><span class="o">]=</span><span class="n">RangeId</span><span class="o">.</span><span class="na">class</span><span class="o">;</span> - <span class="n">Method</span> <span class="n">method</span> <span class="o">=</span> <span class="n">clazz</span><span class="o">.</span><span class="na">getDeclaredMethod</span><span class="o">(</span><span class="n">interfaceMethod</span><span class="o">.</span><span class="na">getName</span><span class="o">(),</span> <span class="n">argTypes</span><span class="o">);</span> - - - <span class="k">if</span> <span class="o">(</span><span class="n">method</span> <span class="o">==</span> <span class="kc">null</span><span class="o">){</span> - <span class="n">System</span><span class="o">.</span><span class="na">out</span><span class="o">.</span><span class="na">println</span><span class="o">(</span><span class="s">"can't find method"</span><span class="o">);</span> - <span class="o">}</span><span class="k">else</span><span class="o">{</span> - <span class="n">RangeId</span> <span class="n">rangeId</span> <span class="o">=</span> <span class="k">new</span> <span class="n">RangeId</span><span class="o">((</span><span class="n">Range</span><span class="o">)</span><span class="n">interfaceArgs</span><span class="o">[</span><span class="mi">0</span><span class="o">]);</span> - <span class="n">interfaceArgs</span><span class="o">[</span><span class="mi">0</span><span class="o">]=</span><span class="n">rangeId</span><span class="o">;</span> - <span class="k">for</span> <span class="o">(</span><span class="n">rangeId</span><span class="o">.</span><span class="na">wgid</span> <span class="o">=</span> <span class="mi">0</span><span class="o">;</span> <span class="n">rangeId</span><span class="o">.</span><span class="na">wgid</span> <span class="o"><</span><span class="n">rangeId</span><span class="o">.</span><span class="na">r</span><span class="o">.</span><span class="na">width</span><span class="o">;</span> <span class="n">rangeId</span><span class="o">.</span><span class="na">wgid</span><span class="o">++){</span> - <span class="n">method</span><span class="o">.</span><span class="na">invoke</span><span class="o">(</span><span class="n">instance</span><span class="o">,</span> <span class="n">interfaceArgs</span><span class="o">);</span> - <span class="o">}</span> - <span class="o">}</span> - - <span class="k">return</span> <span class="kc">null</span><span class="o">;</span> - <span class="o">}</span> - <span class="o">}</span> - - <span class="kd">static</span> <span class="kd">class</span> <span class="nc">Range</span><span class="o">{</span> - <span class="kt">int</span> <span class="n">width</span><span class="o">;</span> - <span class="n">Range</span><span class="o">(</span><span class="kt">int</span> <span class="n">_width</span><span class="o">)</span> <span class="o">{</span> - <span class="n">width</span> <span class="o">=</span> <span class="n">_width</span><span class="o">;</span> - <span class="o">}</span> - <span class="o">}</span> - - <span class="kd">static</span> <span class="kd">class</span> <span class="nc">Range2D</span> <span class="kd">extends</span> <span class="n">Range</span><span class="o">{</span> - <span class="kt">int</span> <span class="n">height</span><span class="o">;</span> - - <span class="n">Range2D</span><span class="o">(</span><span class="kt">int</span> <span class="n">_width</span><span class="o">,</span> <span class="kt">int</span> <span class="n">_height</span><span class="o">)</span> <span class="o">{</span> - <span class="kd">super</span><span class="o">(</span><span class="n">_width</span><span class="o">);</span> - <span class="n">height</span> <span class="o">=</span> <span class="n">_height</span><span class="o">;</span> - <span class="o">}</span> - <span class="o">}</span> - - <span class="kd">static</span> <span class="kd">class</span> <span class="nc">Range1DId</span><span class="o"><</span><span class="n">T</span> <span class="kd">extends</span> <span class="n">Range</span><span class="o">>{</span> - <span class="n">Range1DId</span><span class="o">(</span><span class="n">T</span> <span class="n">_r</span><span class="o">){</span> - <span class="n">r</span> <span class="o">=</span> <span class="n">_r</span><span class="o">;</span> - <span class="o">}</span> - <span class="n">T</span> <span class="n">r</span><span class="o">;</span> - - <span class="kt">int</span> <span class="n">wgid</span><span class="o">,</span> <span class="n">wlid</span><span class="o">,</span> <span class="n">wgsize</span><span class="o">,</span> <span class="n">wlsize</span><span class="o">,</span> <span class="n">wgroup</span><span class="o">;</span> - <span class="o">}</span> - - <span class="kd">static</span> <span class="kd">class</span> <span class="nc">RangeId</span> <span class="kd">extends</span> <span class="n">Range1DId</span><span class="o"><</span><span class="n">Range</span><span class="o">>{</span> - <span class="n">RangeId</span><span class="o">(</span><span class="n">Range</span> <span class="n">r</span><span class="o">){</span> - <span class="kd">super</span><span class="o">(</span><span class="n">r</span><span class="o">);</span> - <span class="o">}</span> - <span class="o">}</span> - - <span class="kd">static</span> <span class="kd">class</span> <span class="nc">Range2DId</span> <span class="kd">extends</span> <span class="n">Range1DId</span><span class="o"><</span><span class="n">Range2D</span><span class="o">>{</span> - <span class="n">Range2DId</span><span class="o">(</span><span class="n">Range2D</span> <span class="n">r</span><span class="o">){</span> - <span class="kd">super</span><span class="o">(</span><span class="n">r</span><span class="o">);</span> - <span class="o">}</span> - - <span class="kt">int</span> <span class="n">hgid</span><span class="o">,</span> <span class="n">hlid</span><span class="o">,</span> <span class="n">hgsize</span><span class="o">,</span> <span class="n">hlsize</span><span class="o">,</span> <span class="n">hgroup</span><span class="o">;</span> - <span class="o">}</span> - - - - - - <span class="kd">static</span> <span class="o"><</span><span class="n">T</span><span class="o">></span> <span class="n">T</span> <span class="nf">create</span><span class="o">(</span><span class="n">Object</span> <span class="n">_instance</span><span class="o">,</span> <span class="n">Class</span><span class="o"><</span><span class="n">T</span><span class="o">></span> <span class="n">_interface</span><span class="o">)</span> <span class="o">{</span> - <span class="n">OpenCLInvocationHandler</span><span class="o"><</span><span class="n">T</span><span class="o">></span> <span class="n">invocationHandler</span> <span class="o">=</span> <span class="k">new</span> <span class="n">OpenCLInvocationHandler</span><span class="o"><</span><span class="n">T</span><span class="o">>(</span><span class="n">_instance</span><span class="o">);</span> - <span class="n">T</span> <span class="n">instance</span> <span class="o">=</span> <span class="o">(</span><span class="n">T</span><span class="o">)</span> <span class="n">Proxy</span><span class="o">.</span><span class="na">newProxyInstance</span><span class="o">(</span><span class="n">Ideal</span><span class="o">.</span><span class="na">class</span><span class="o">.</span><span class="na">getClassLoader</span><span class="o">(),</span> <span class="k">new</span> <span class="n">Class</span><span class="o">[]</span> <span class="o">{</span> - <span class="n">_interface</span><span class="o">,</span> - - <span class="o">},</span> <span class="n">invocationHandler</span><span class="o">);</span> - <span class="k">return</span> <span class="o">(</span><span class="n">instance</span><span class="o">);</span> - - <span class="o">}</span> - - - - <span class="kd">public</span> <span class="kd">static</span> <span class="kd">class</span> <span class="nc">Squarer</span><span class="o">{</span> - <span class="kd">interface</span> <span class="nc">API</span> <span class="o">{</span> - <span class="kd">public</span> <span class="n">API</span> <span class="nf">foo</span><span class="o">(</span><span class="n">Range</span> <span class="n">range</span><span class="o">,</span> <span class="kt">int</span><span class="o">[]</span> <span class="n">in</span><span class="o">,</span> <span class="kt">int</span><span class="o">[]</span> <span class="n">out</span><span class="o">);</span> - <span class="kd">public</span> <span class="n">Squarer</span> <span class="nf">dispatch</span><span class="o">();</span> - - <span class="o">}</span> - - <span class="kd">public</span> <span class="n">API</span> <span class="nf">foo</span><span class="o">(</span><span class="n">RangeId</span> <span class="n">rangeId</span><span class="o">,</span> <span class="kt">int</span><span class="o">[]</span> <span class="n">in</span><span class="o">,</span> <span class="kt">int</span><span class="o">[]</span> <span class="n">out</span><span class="o">)</span> <span class="o">{</span> - <span class="n">out</span><span class="o">[</span><span class="n">rangeId</span><span class="o">.</span><span class="na">wgid</span><span class="o">]</span> <span class="o">=</span> <span class="n">in</span><span class="o">[</span><span class="n">rangeId</span><span class="o">.</span><span class="na">wgid</span><span class="o">]*</span><span class="n">in</span><span class="o">[</span><span class="n">rangeId</span><span class="o">.</span><span class="na">wgid</span><span class="o">];</span> - <span class="k">return</span><span class="o">(</span><span class="kc">null</span><span class="o">);</span> - <span class="o">}</span> - <span class="o">}</span> - - <span class="cm">/** - * @param args - */</span> - <span class="kd">public</span> <span class="kd">static</span> <span class="kt">void</span> <span class="nf">main</span><span class="o">(</span><span class="n">String</span><span class="o">[]</span> <span class="n">args</span><span class="o">)</span> <span class="o">{</span> - - <span class="n">Squarer</span><span class="o">.</span><span class="na">API</span> <span class="n">squarer</span> <span class="o">=</span> <span class="n">create</span><span class="o">(</span><span class="k">new</span> <span class="n">Squarer</span><span class="o">(),</span> <span class="n">Squarer</span><span class="o">.</span><span class="na">API</span><span class="o">.</span><span class="na">class</span><span class="o">);</span> - <span class="kt">int</span><span class="o">[]</span> <span class="n">in</span> <span class="o">=</span> <span class="k">new</span> <span class="kt">int</span><span class="o">[]</span> <span class="o">{</span> - <span class="mi">1</span><span class="o">,</span> - <span class="mi">2</span><span class="o">,</span> - <span class="mi">3</span><span class="o">,</span> - <span class="mi">4</span><span class="o">,</span> - <span class="mi">5</span><span class="o">,</span> - <span class="mi">6</span> - <span class="o">};</span> - <span class="kt">int</span><span class="o">[]</span> <span class="n">out</span> <span class="o">=</span> <span class="k">new</span> <span class="kt">int</span><span class="o">[</span><span class="n">in</span><span class="o">.</span><span class="na">length</span><span class="o">];</span> - <span class="n">Range</span> <span class="n">range</span> <span class="o">=</span> <span class="k">new</span> <span class="n">Range</span><span class="o">(</span><span class="n">in</span><span class="o">.</span><span class="na">length</span><span class="o">);</span> - - <span class="n">squarer</span><span class="o">.</span><span class="na">foo</span><span class="o">(</span><span class="n">range</span><span class="o">,</span> <span class="n">in</span><span class="o">,</span> <span class="n">out</span><span class="o">);</span> - - <span class="k">for</span> <span class="o">(</span><span class="kt">int</span> <span class="nl">s:</span><span class="n">out</span><span class="o">){</span> - <span class="n">System</span><span class="o">.</span><span class="na">out</span><span class="o">.</span><span class="na">println</span><span class="o">(</span><span class="n">s</span><span class="o">);</span> - <span class="o">}</span> - - <span class="o">}</span> - - <span class="o">}</span> - </code></pre> - </div> - </main> - <footer class='page-footer'> - <div class='container'> - <div class='row'> - <div class='col l4 s12'> - <h5 class='white-text'>Help Aparapi Grow</h5> - <p class='grey-text text-lighten-4'>We are a team of volunteers working on this project like it's our full time job. Any amount would help support and continue development on this project and is greatly appreciated.</p> - <form action='https://www.paypal.com/cgi-bin/webscr' id='paypal-donate' method='post' target='_top'> - <input name='cmd' type='hidden' value='_s-xclick'> - <input name='encrypted' type='hidden' value='-----BEGIN PKCS7-----MIIHoAYJKoZIhvcNAQcEoIIHkTCCB40CAQExggEwMIIBLAIBADCBlDCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20CAQAwDQYJKoZIhvcNAQEBBQAEgYATcKxN8t35TG2x34eY272SuZO3QbGy+BTGIM5DRV6Hmosotzw2TF42ceWmbXb3Gk4Wy5kUgo4TgHExCZHUSlHUl+A9KWLFejotgQJPhbiBsnns3klWbKftA3LEnP/kz/SW7OyBlpluoHoEGb354/aoX3JEctp3akHiZEmD7JyEgjELMAkGBSsOAwIaBQAwggEcBgkqhkiG9w0BBwEwFAYIKoZIhvcNAwcECOGCJwba6JICgIH4RtE1LE3juagKs+swI5tb9Y2LacWo+qn1H1aLKeg57bQMqqcWYvkoO1joYoglPc1h4mO0egZjHPQ6ih0K0IYlXw2SRpNylSlIMUE3GW6smjSSwRhscZfXQYUnmQsfYvkFwoKrlZGf/1u0Q7nwlZ1szIKnDMZ5f+k8xBcM0sMNutn/y9CH6A3zo01gQBIF29+1WYAoQspNAnfWQy3ydV7nbjIA9ThDp2WquWw3EVlvqlvm/3C2AFuH/L4q0ltn3qjkCdzXK0O2jW3TRrzligPkAy6CN0Tw2jGW5GENNC1L92vHFH4kBXUPlhvw39TgoN7/KRUjVoYPYgugggOHMIIDgzCCAuygAwIBAgIBADANBgkqhkiG9w0BAQUFADCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20wHhcNMDQwMjEzMTAxMzE1WhcNMzUwMjEzMTAxMzE1WjCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20wgZ8wDQYJKoZIhvcNAQEBBQADgY0AMIGJAoGBAMFHTt38RMxLXJyO2SmS+Ndl72T7oKJ4u4uw+6awntALWh03PewmIJuzbALScsTS4sZoS1fKciBGoh11gIfHzylvkdNe/hJl66/RGqrj5rFb08sAABNTzDTiqqNpJeBsYs/c2aiGozptX2RlnBktH+SUNpAajW724Nv2Wvhif6sFAgMBAAGjge4wgeswHQYDVR0OBBYEFJaffLvGbxe9WT9S1wob7BDWZJRrMIG7BgNVHSMEgbMwgbCAFJaffLvGbxe9WT9S1wob7BDWZJRroYGUpIGRMIGOMQswCQYDVQQGEwJVUzELMAkGA1UECBMCQ0ExFjAUBgNVBAcTDU1vdW50YWluIFZpZXcxFDASBgNVBAoTC1BheVBhbCBJbmMuMRMwEQYDVQQLFApsaXZlX2NlcnRzMREwDwYDVQQDFAhsaXZlX2FwaTEcMBoGCSqGSIb3DQEJARYNcmVAcGF5cGFsLmNvbYIBADAMBgNVHRMEBTADAQH/MA0GCSqGSIb3DQEBBQUAA4GBAIFfOlaagFrl71+jq6OKidbWFSE+Q4FqROvdgIONth+8kSK//Y/4ihuE4Ymvzn5ceE3S/iBSQQMjyvb+s2TWbQYDwcp129OPIbD9epdr4tJOUNiSojw7BHwYRiPh58S1xGlFgHFXwrEBb3dgNbMUa+u4qectsMAXpVHnD9wIyfmHMYIBmjCCAZYCAQEwgZQwgY4xCzAJBgNVBAYTAlVTMQswCQYDVQQIEwJDQTEWMBQGA1UEBxMNTW91bnRhaW4gVmlldzEUMBIGA1UEChMLUGF5UGFsIEluYy4xEzARBgNVBAsUCmxpdmVfY2VydHMxETAPBgNVBAMUCGxpdmVfYXBpMRwwGgYJKoZIhvcNAQkBFg1yZUBwYXlwYWwuY29tAgEAMAkGBSsOAwIaBQCgXTAYBgkqhkiG9w0BCQMxCwYJKoZIhvcNAQcBMBwGCSqGSIb3DQEJBTEPFw0xNjExMjkyMjA1NTNaMCMGCSqGSIb3DQEJBDEWBBS4i3Exr/pFcKOJy8uKmH+nGIMjqDANBgkqhkiG9w0BAQEFAASBgDAbFZ2jieloeB/0wCAcvYCFAIXmmBaMS5js/byzU5gK7exSTlRMX74IkmHemItaOcw3wyFlu4i118D9K9SbSbFiX9DGDcezGh42u/6G8TuZMwlvmiehwMioTVcm4jWG40YLiv8pJZypfoSx2w4IAFb4na5i/E1qOrwQOpiBho+s-----END PKCS7-----'> - <button alt='PayPal - The safer, easier way to pay online!' class='btn waves-effect waves-light red lighten-3' name='action' type='submit'> - Donate Now - </button> - </form> - </div> - <div class='col l4 s12'> - <h5 class='white-text'>Join the Discussion</h5> - <p class='grey-text text-lighten-4'>We have a Gitter chat room set up where you can talk directly with us. Come in and discuss new features, future goals, general problems or questions, or anything else you can think of.</p> - <a class='btn waves-effect waves-light red lighten-3' href='https://gitter.im/Syncleus/aparapi' target='_blank'>Chat</a> - </div> - <div class='col l4 s12' style='overflow: hidden;'> - <h5 class='white-text'>Connect</h5> - <iframe allowtransparency='true' frameborder='0' height='30' scrolling='0' src='http://ghbtns.com/github-btn.html?user=Syncleus&repo=aparapi&type=watch&count=true&size=large' width='170'></iframe> - <br> - <a class='twitter-follow-button' data-dnt='true' data-show-count='true' data-size='large' href='https://twitter.com/AparapiLib'>Follow @AparapiLib</a> - <br> - <div class='g-follow' data-annotation='bubble' data-height='24' data-href='https://plus.google.com/102266131584900704956' data-rel='publisher'></div> - </div> - </div> - </div> - <div class='footer-copyright'> - <div class='container'> - © 2016-2017 Syncleus, All rights reserved. - <a class='grey-text text-lighten-4 right' href='https://github.com/Syncleus/aparapi/blob/master/LICENSE'>Apache License v2</a> - </div> - </div> - </footer> - <!-- Scripts --> - <script src='https://code.jquery.com/jquery-2.1.4.min.js'></script> - <script> - if (!window.jQuery) { document.write('<script src="bin/jquery-2.1.1.min.js"><\/script>'); } - </script> - <script src='/javascripts/jquery.timeago.js'></script> - <script src='/javascripts/materialize.min.js'></script> - <script src='/javascripts/lunr.min.js'></script> - <script src='/javascripts/search.js'></script> - <script src='/javascripts/materialize.js'></script> - <script src='/javascripts/init.js'></script> - <!-- Twitter Button --> - <script> - !function(d,s,id){var js,fjs=d.getElementsByTagName(s)[0],p=/^http:/.test(d.location)?'http':'https';if(!d.getElementById(id)){js=d.createElement(s);js.id=id;js.src=p+'://platform.twitter.com/widgets.js';fjs.parentNode.insertBefore(js,fjs);}}(document, 'script', 'twitter-wjs'); - </script> - <!-- Google Plus Button --> - <script async='' defer='defer' src='https://apis.google.com/js/platform.js'></script> - </body> +<head> +<meta content='text/html; charset=UTF-8' http-equiv='Content-Type'> +<meta content='width=device-width, initial-scale=1' name='viewport'> +<meta content='IE=edge' http-equiv='X-UA-Compatible'> +<meta content='no' name='msapplication-tap-highlight'> +<meta content='Aparapi is an Open-source framework for executing native Java code on the GPU, developed by Syncleus.' name='description'> +<title> +Aparapi | Multiple Entry Points +</title> +<!-- Favicons --> +<link href='/images/favicon/apple-touch-icon-152x152.png' rel='apple-touch-icon-precomposed'> +<meta content='#FFFFFF' name='msapplication-TileColor'> +<meta content='/images/favicon/mstile-144x144.png' name='msapplication-TileImage'> +<link href='/images/favicon/favicon-32x32.png' rel='icon' sizes='32x32'> +<!-- Android 5 Chrome Color --> +<meta content='#EE6E73' name='theme-color'> +<!-- CSS --> +<link href='/stylesheets/highlight.css' media='screen,projection' rel='stylesheet' type='text/css'> +<link href='/stylesheets/style.css' media='screen,projection' rel='stylesheet' type='text/css'> +<link href='http://fonts.googleapis.com/css?family=Inconsolata' rel='stylesheet' type='text/css'> +<link href='http://fonts.googleapis.com/icon?family=Material+Icons' rel='stylesheet'> +</head> +<body> +<header> +<div class='container'> +<a class='button-collapse top-nav waves-effect waves-light circle hide-on-large-only' data-activates='nav-mobile' href='#'> +<i class='material-icons'>menu</i> +</a> +</div> +<ul class='side-nav fixed' id='nav-mobile'> +<li class='logo'> +<a class='brand-logo' href='/' id='logo-container'> +<object data='/images/logo.svg' id='front-page-logo' type='image/svg+xml'>Your browser does not support SVG</object> +</a> +</li> +<li class='search'> +<div class='search-wrapper card'> +<input id='search'> +<i class='material-icons'>search</i> +<div class='search-results'></div> +</div> +</li> +<li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/">Overview</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Introduction</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/about.html">About</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/getting-started.html">Getting Started</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/faq.html">FAQ</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Documentation</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/aparapi-patterns.html">Aparapi Patterns</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/choosing-specific-devices.html">Choosing Specific Devices</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/converting-java-to-opencl.html">Converting Java to OpenCL</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/emulating-multiple-entrypoints.html">Emulating Multiple Entrypoints</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/explicit-buffer-handling.html">Explicit Buffer Handling</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/hsa-enabled-lambda.html">HSA Enabled Lambda</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/kernel-guidelines.html">Kernel Guidelines</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/library-agent-duality.html">Library Agent Duality</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/new-features.html">New Features</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/opencl-bindings.html">OpenCL Bindings</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/private-memory-space.html">Private Memory Space</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/profiling-the-kernel.html">Profiling the Kernel</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/setting-up-hsa.html">Setting Up HSA</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/unit-tests.html">Unit Tests</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/using-hsa-simulator.html">Using HSA Simulator</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/constant-memory.html">Constant Memory</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/local-memory.html">Local Memory</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/multiple-dim-ranges.html">Multiple Dim Ranges</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Proposals</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/multiple-dim-nd-range.html">Multiple Dim ND Range</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/lambdas.html">Lambdas</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/address-space-with-buffers.html">Address Space with Buffers</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/extensions.html">Extensions</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/device.html">Device</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/multiple-entry-points.html">Multiple Entry Points</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/lambda-syntax.html">Lambda Syntax</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/showcase.html">Showcase</a></li></ul></li> +</ul> +</header> +<main> +<div class='section no-pad-bot' id='index-banner'> +<div class='container'> +<h1 class='header center-on-small-only'>Multiple Entry Points</h1> +<div class='row center'> +<h4 class='header col s12 light center'>How to extend Aparapi to allow multiple entrypoints for kernels.</h4> +</div> + +</div> + +</div> +<div class='container'> +<h2>The Current Single Entrypoint World</h2> + +<p>At present Aparapi allows us to dispatch execution to a single ‘single entry point’ in a Kernel. Essentially for each Kernel only the overridden Kernel.run() method can be used to initiate execution on the GPU.</p> + +<p>Our canonical example is the ‘Squarer’ Kernel which allows us to create squares for each element in an input array in an output array.</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="n">Kernel</span> <span class="n">squarer</span> <span class="o">=</span> <span class="k">new</span> <span class="n">Kernel</span><span class="o">(){</span> + <span class="nd">@Overide</span> <span class="kd">public</span> <span class="kt">void</span> <span class="nf">run</span><span class="o">(){</span> + <span class="kt">int</span> <span class="n">id</span> <span class="o">=</span> <span class="n">getGlobalId</span><span class="o">(</span><span class="mi">0</span><span class="o">);</span> + <span class="n">out</span><span class="o">[</span><span class="n">id</span><span class="o">]</span> <span class="o">=</span> <span class="n">in</span><span class="o">[</span><span class="n">id</span><span class="o">]</span> <span class="o">*</span> <span class="n">in</span><span class="o">[</span><span class="n">id</span><span class="o">];</span> + <span class="o">}</span> +<span class="o">};</span> +</code></pre></div> +<p>If we wanted a vector addition Kernel we would have to create a whole new Kernel.</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="n">Kernel</span> <span class="n">adder</span> <span class="o">=</span> <span class="k">new</span> <span class="n">Kernel</span><span class="o">(){</span> + <span class="nd">@Overide</span> <span class="kd">public</span> <span class="kt">void</span> <span class="nf">run</span><span class="o">(){</span> + <span class="kt">int</span> <span class="n">id</span> <span class="o">=</span> <span class="n">getGlobalId</span><span class="o">(</span><span class="mi">0</span><span class="o">);</span> + <span class="n">out</span><span class="o">[</span><span class="n">id</span><span class="o">]</span> <span class="o">=</span> <span class="n">in</span><span class="o">[</span><span class="n">id</span><span class="o">]</span> <span class="o">*</span> <span class="n">in</span><span class="o">[</span><span class="n">id</span><span class="o">];</span> + <span class="o">}</span> +<span class="o">};</span> +</code></pre></div> +<p>For us to square and then add a constant we would have to invoke two kernels. Or of course create single SquarerAdder kernel.</p> + +<p>See this page EmulatingMultipleEntrypointsUsingCurrentAPI for ideas on how to emulate having multiple methods, by passing data to a single run() method.</p> + +<h2>Why can’t Aparapi just allow ‘arbitary’ methods</h2> + +<p>Ideally we would just expose a more natural API, one which allows us to provide specific methods for each arithmetic operation.</p> + +<p>Essentially</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="kd">class</span> <span class="nc">VectorKernel</span> <span class="kd">extends</span> <span class="n">Kernel</span><span class="o">{</span> + <span class="kd">public</span> <span class="kt">void</span> <span class="nf">add</span><span class="o">();</span> + <span class="kd">public</span> <span class="kt">void</span> <span class="nf">sub</span><span class="o">();</span> + <span class="kd">public</span> <span class="kt">void</span> <span class="nf">sqr</span><span class="o">();</span> + <span class="kd">public</span> <span class="kt">void</span> <span class="nf">sqrt</span><span class="o">();</span> +<span class="o">}</span> +</code></pre></div> +<p>Unfortunately this is hard to implement using Aparapi. There are two distinct problems, both at runtime.</p> + +<ul> +<li>How will Aparapi know which of the available methods we want to execute when we call Kernel.execute(range)?</li> +<li>On first execution how does Aparapi determine which methods might be entrypoints and are therefore need to be converted to OpenCL?</li> +</ul> + +<p>The first problem can be solved by extending Kernel.execute() to accept a method name</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="n">kernel</span><span class="o">.</span><span class="na">execute</span><span class="o">(</span><span class="n">SIZE</span><span class="o">,</span> <span class="s">"add"</span><span class="o">);</span> +</code></pre></div> +<p>This is the obvious solution, but really causes maintenence issues int that it trades compile time reporting for a runtime errors. If a developer mistypes the name of the method, :-</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="n">kernel</span><span class="o">.</span><span class="na">execute</span><span class="o">(</span><span class="n">SIZE</span><span class="o">,</span> <span class="s">"sadd"</span><span class="o">);</span> <span class="c1">// there is no such method</span> +</code></pre></div> +<p>The code will compile perfectly, only at runtime will we detect that there is no such method.</p> + +<h2>An aside</h2> + +<p>Maybe the new Java 8 method reference feature method might help here. In the paper below Brian Goetz talks about a double-colon syntax (Class::Method) for directly referencing a method which is presumably checked at compile time.</p> + +<p>So presumably</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="n">kernel</span><span class="o">.</span><span class="na">execute</span><span class="o">(</span><span class="n">SIZE</span><span class="o">,</span> <span class="nl">VectorKernel:</span><span class="o">:</span><span class="n">add</span><span class="o">);</span> +</code></pre></div> +<p>Would compile just fine, whereby</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="n">kernel</span><span class="o">.</span><span class="na">execute</span><span class="o">(</span><span class="n">SIZE</span><span class="o">,</span> <span class="nl">VectorKernel:</span><span class="o">:</span><span class="n">sadd</span><span class="o">);</span> +</code></pre></div> +<p>Would yield a compile time error.</p> + +<p>See Brian Goetz’s excellent Lambda documentation</p> + +<h2>back from Aside</h2> + +<p>The second problem (knowing which methods need to be converted to OpenCL) can probably be solved using an Annotation.</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="kd">class</span> <span class="nc">VectorKernel</span> <span class="kd">extends</span> <span class="n">Kernel</span><span class="o">{</span> + <span class="nd">@EntryPoint</span> <span class="kd">public</span> <span class="kt">void</span> <span class="nf">add</span><span class="o">();</span> + <span class="nd">@EntryPoint</span> <span class="kd">public</span> <span class="kt">void</span> <span class="nf">sub</span><span class="o">();</span> + <span class="nd">@EntryPoint</span> <span class="kd">public</span> <span class="kt">void</span> <span class="nf">sqr</span><span class="o">();</span> + <span class="nd">@EntryPoint</span> <span class="kd">public</span> <span class="kt">void</span> <span class="nf">sqrt</span><span class="o">();</span> + <span class="kd">public</span> <span class="kt">void</span> <span class="nf">nonOpenCLMethod</span><span class="o">();</span> +<span class="o">}</span> +</code></pre></div> +<p>Here the @EntryPoint annotation allows the Aparapi runtime to determine which methods need to be exposed.</p> + +<h1>My Extension Proposal</h1> + +<p>Here is my proposal. Not only does it allow us to reference multiple entryoints, but I think it actually improves the single entrypoint API, albeit at the cost of being more verbose.</p> + +<h2>The developer must provide an API interface</h2> + +<p>First I propose that we should ask the developer to provide an interface for all methods that we wish to execute on the GPU (or convert to OpenCL).</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="kd">interface</span> <span class="nc">VectorAPI</span> <span class="kd">extends</span> <span class="n">AparapiAPI</span> <span class="o">{</span> + <span class="kd">public</span> <span class="kt">void</span> <span class="nf">add</span><span class="o">(</span><span class="n">Range</span> <span class="n">range</span><span class="o">);</span> + <span class="kd">public</span> <span class="kt">void</span> <span class="nf">sub</span><span class="o">(</span><span class="n">Range</span> <span class="n">range</span><span class="o">);</span> + <span class="kd">public</span> <span class="kt">void</span> <span class="nf">sqrt</span><span class="o">(</span><span class="n">Range</span> <span class="n">range</span><span class="o">);</span> + <span class="kd">public</span> <span class="kt">void</span> <span class="nf">sqr</span><span class="o">(</span><span class="n">Range</span> <span class="n">range</span><span class="o">);</span> +<span class="o">}</span> +</code></pre></div> +<p>Note that each API takes a Range, this will make more sense in a moment.</p> + +<h2>The developer provides a bound implementation</h2> + +<p>Aparapi should provide a mechanism for mapping the proposed implementation of the API to it’s implementation.</p> + +<p>Note the weasel words here, this is not a conventional implementation of an interface. We will use an annotation (@Implements(Class class)) to provide the binding.</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="nd">@Implements</span><span class="o">(</span><span class="n">VectorAPI</span><span class="o">.</span><span class="na">class</span><span class="o">)</span> <span class="kd">class</span> <span class="nc">Vector</span> <span class="kd">extends</span> <span class="n">Kernel</span> <span class="o">{</span> + <span class="kd">public</span> <span class="kt">void</span> <span class="nf">add</span><span class="o">(</span><span class="n">RangeId</span> <span class="n">rangeId</span><span class="o">){</span><span class="cm">/*implementation here */</span><span class="o">}</span> + <span class="kd">public</span> <span class="kt">void</span> <span class="nf">sub</span><span class="o">(</span><span class="n">RangeId</span> <span class="n">rangeId</span><span class="o">){</span><span class="cm">/*implementation here */</span><span class="o">}</span> + <span class="kd">public</span> <span class="kt">void</span> <span class="nf">sqrt</span><span class="o">(</span><span class="n">RangeId</span> <span class="n">rangeId</span><span class="o">){</span><span class="cm">/*implementation here */</span><span class="o">}</span> + <span class="kd">public</span> <span class="kt">void</span> <span class="nf">sqr</span><span class="o">(</span><span class="n">RangeId</span> <span class="n">rangeId</span><span class="o">){</span><span class="cm">/*implementation here */</span><span class="o">}</span> + <span class="kd">public</span> <span class="kt">void</span> <span class="kd">public</span> <span class="kt">void</span> <span class="nf">nonOpenCLMethod</span><span class="o">();</span> +<span class="o">}</span> +</code></pre></div> +<h2>Why we can’t the implementation just implement the interface?</h2> + +<p>This would be ideal. Sadly we need to intercept a call to say VectorAPI.add(Range) and dispatch to the resulting Vector.add(RangeId) instances. If you look at the signatures, the interface accepts a Range as it’s arg (the range over which we intend to execute) whereas the implementation (either called by JTP threads or GPU OpenCL dispatch) receives a RangeId (containing the unique globalId, localId, etc fields). At the very end of this page I show a strawman implementation of a sequential loop implementation.</p> + +<h2>So how do we get an implementation of VectorAPI</h2> + +<p>We instantiate our Kernel by creating an instance using new. We then ask this instance to create an API instance. Some presumably java.util.Proxy trickery will create an implementation of the actual instance, backed by the Java implementation.</p> + +<p>So execution would look something like.</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="n">Vector</span> <span class="n">kernel</span> <span class="o">=</span> <span class="k">new</span> <span class="n">Vector</span><span class="o">();</span> +<span class="n">VectorAPI</span> <span class="n">kernelApi</span> <span class="o">=</span> <span class="n">kernel</span><span class="o">.</span><span class="na">api</span><span class="o">();</span> +<span class="n">Range</span> <span class="n">range</span> <span class="o">=</span> <span class="n">Range</span><span class="o">.</span><span class="na">create</span><span class="o">(</span><span class="n">SIZE</span><span class="o">);</span> +<span class="n">kernalApi</span><span class="o">.</span><span class="na">add</span><span class="o">(</span><span class="n">range</span><span class="o">);</span> +</code></pre></div> +<p>So the Vector instance is a pure Java implementation. The extracted API is the bridge to the GPU.</p> + +<p>Of course then we can also execute using an inline call through api()</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="n">Vector</span> <span class="n">kernel</span> <span class="o">=</span> <span class="k">new</span> <span class="n">Vector</span><span class="o">();</span> +<span class="n">Range</span> <span class="n">range</span> <span class="o">=</span> <span class="n">Range</span><span class="o">.</span><span class="na">create</span><span class="o">(</span><span class="n">SIZE</span><span class="o">);</span> +<span class="n">kernel</span><span class="o">.</span><span class="na">api</span><span class="o">().</span><span class="na">add</span><span class="o">(</span><span class="n">range</span><span class="o">);</span> +<span class="n">kernel</span><span class="o">.</span><span class="na">api</span><span class="o">().</span><span class="na">sqrt</span><span class="o">(</span><span class="n">range</span><span class="o">);</span> +</code></pre></div> +<p>or even expose api as public final fields</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="n">Vector</span> <span class="n">kernel</span> <span class="o">=</span> <span class="k">new</span> <span class="n">Vector</span><span class="o">();</span> +<span class="n">Range</span> <span class="n">range</span> <span class="o">=</span> <span class="n">Range</span><span class="o">.</span><span class="na">create</span><span class="o">(</span><span class="n">SIZE</span><span class="o">);</span> +<span class="n">kernel</span><span class="o">.</span><span class="na">api</span><span class="o">.</span><span class="na">add</span><span class="o">(</span><span class="n">range</span><span class="o">);</span> +<span class="n">kernel</span><span class="o">.</span><span class="na">api</span><span class="o">.</span><span class="na">sqrt</span><span class="o">(</span><span class="n">range</span><span class="o">);</span> +</code></pre></div> +<h2>How would our canonical Squarer example look</h2> +<div class="highlight"><pre class="highlight java"><code> +<span class="kd">interface</span> <span class="nc">SquarerAPI</span> <span class="kd">extends</span> <span class="n">AparapiAPI</span><span class="o">{</span> + <span class="n">square</span><span class="o">(</span><span class="n">Range</span> <span class="n">range</span><span class="o">);</span> +<span class="o">}</span> + +<span class="nd">@Implement</span><span class="o">(</span><span class="n">SquarerAPI</span><span class="o">)</span> <span class="kd">class</span> <span class="nc">Squarer</span> <span class="kd">extends</span> <span class="n">Kernel</span><span class="o">{</span> + <span class="kt">int</span> <span class="n">in</span><span class="o">[];</span> + <span class="kt">int</span> <span class="n">square</span><span class="o">[];</span> + <span class="kd">public</span> <span class="kt">void</span> <span class="nf">square</span><span class="o">(</span><span class="n">RangeId</span> <span class="n">rangeId</span><span class="o">){</span> + <span class="n">square</span><span class="o">[</span><span class="n">rangeId</span><span class="o">.</span><span class="na">gid</span><span class="o">]</span> <span class="o">=</span> <span class="n">in</span><span class="o">[</span><span class="n">rangeId</span><span class="o">.</span><span class="na">gid</span><span class="o">]*</span><span class="n">in</span><span class="o">[</span><span class="n">rangeId</span><span class="o">.</span><span class="na">gid</span><span class="o">];</span> + <span class="o">}</span> +<span class="o">}</span> +</code></pre></div> +<p>Then we execute using</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="n">Squarer</span> <span class="n">squarer</span> <span class="o">=</span> <span class="k">new</span> <span class="n">Squarer</span><span class="o">();</span> +<span class="c1">// fill squarer.in[SIZE]</span> +<span class="c1">// create squarer.values[SIZE];</span> + + +<span class="n">squarer</span><span class="o">.</span><span class="na">api</span><span class="o">().</span><span class="na">square</span><span class="o">(</span><span class="n">Range</span><span class="o">.</span><span class="na">create</span><span class="o">(</span><span class="n">SIZE</span><span class="o">));</span> +</code></pre></div> +<h1>Extending this proposal to allow argument passing</h1> + +<p>Note that we have effectively replaced the use of the ‘abstract’ squarer.execute(range) with the more concrete squarer.api().add(range).</p> + +<p>Now I would like to propose that we take one more step by allowing us to pass arguments to our methods.</p> + +<p>Normally Aparapi captures buffer and field accesses to create the args that it passes to the generated OpenCL code. In our canonical squarer example the <code>in[]</code> and <code>square[]</code> buffers are captured from the bytecode and passed (behind the scenes) to the OpenCL.</p> + +<p>However, by exposing the actual method we want to execute, we could also allow the API to accept parameters.</p> + +<p>So our squarer example would go from</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="kd">interface</span> <span class="nc">SquarerAPI</span> <span class="kd">extends</span> <span class="n">AparapiAPI</span><span class="o">{</span> + <span class="n">square</span><span class="o">(</span><span class="n">Range</span> <span class="n">range</span><span class="o">);</span> +<span class="o">}</span> + +<span class="nd">@Implement</span><span class="o">(</span><span class="n">SquarerAPI</span><span class="o">)</span> <span class="kd">class</span> <span class="nc">Squarer</span> <span class="kd">extends</span> <span class="n">Kernel</span><span class="o">{</span> + <span class="kt">int</span> <span class="n">in</span><span class="o">[];</span> + <span class="kt">int</span> <span class="n">square</span><span class="o">[];</span> + <span class="kd">public</span> <span class="kt">void</span> <span class="nf">square</span><span class="o">(</span><span class="n">RangeId</span> <span class="n">rangeId</span><span class="o">){</span> + <span class="n">square</span><span class="o">[</span><span class="n">rangeId</span><span class="o">.</span><span class="na">gid</span><span class="o">]</span> <span class="o">=</span> <span class="n">in</span><span class="o">[</span><span class="n">rangeId</span><span class="o">.</span><span class="na">gid</span><span class="o">]*</span><span class="n">in</span><span class="o">[</span><span class="n">rangeId</span><span class="o">.</span><span class="na">gid</span><span class="o">];</span> + <span class="o">}</span> +<span class="o">}</span> + + +<span class="n">Squarer</span> <span class="n">squarer</span> <span class="o">=</span> <span class="k">new</span> <span class="n">Squarer</span><span class="o">();</span> +<span class="c1">// fill squarer.in[SIZE]</span> +<span class="c1">// create squarer.values[SIZE];</span> + +<span class="n">squarer</span><span class="o">.</span><span class="na">api</span><span class="o">().</span><span class="na">square</span><span class="o">(</span><span class="n">Range</span><span class="o">.</span><span class="na">create</span><span class="o">(</span><span class="n">SIZE</span><span class="o">));</span> +</code></pre></div> +<p>to</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="kd">interface</span> <span class="nc">SquarerAPI</span> <span class="kd">extends</span> <span class="n">AparapiAPI</span><span class="o">{</span> + <span class="n">square</span><span class="o">(</span><span class="n">Range</span> <span class="n">range</span><span class="o">,</span> <span class="kt">int</span><span class="o">[]</span> <span class="n">in</span><span class="o">,</span> <span class="kt">int</span><span class="o">[]</span> <span class="n">square</span><span class="o">);</span> +<span class="o">}</span> + +<span class="nd">@Implement</span><span class="o">(</span><span class="n">SquarerAPI</span><span class="o">)</span> <span class="kd">class</span> <span class="nc">Squarer</span> <span class="kd">extends</span> <span class="n">Kernel</span><span class="o">{</span> + <span class="kd">public</span> <span class="kt">void</span> <span class="nf">square</span><span class="o">(</span><span class="n">RangeId</span> <span class="n">rangeId</span><span class="o">,</span> <span class="kt">int</span><span class="o">[]</span> <span class="n">in</span><span class="o">,</span> <span class="kt">int</span><span class="o">[]</span> <span class="n">square</span><span class="o">){</span> + <span class="n">square</span><span class="o">[</span><span class="n">rangeId</span><span class="o">.</span><span class="na">gid</span><span class="o">]</span> <span class="o">=</span> <span class="n">in</span><span class="o">[</span><span class="n">rangeId</span><span class="o">.</span><span class="na">gid</span><span class="o">]*</span><span class="n">in</span><span class="o">[</span><span class="n">rangeId</span><span class="o">.</span><span class="na">gid</span><span class="o">];</span> + <span class="o">}</span> +<span class="o">}</span> + + +<span class="n">Squarer</span> <span class="n">squarer</span> <span class="o">=</span> <span class="k">new</span> <span class="n">Squarer</span><span class="o">();</span> +<span class="kt">int</span><span class="o">[]</span> <span class="n">in</span> <span class="o">=</span> <span class="c1">// create and fill squarer.in[SIZE]</span> +<span class="kt">int</span><span class="o">[]</span> <span class="n">square</span> <span class="o">=</span> <span class="c1">// create squarer.values[SIZE];</span> + +<span class="n">squarer</span><span class="o">.</span><span class="na">api</span><span class="o">().</span><span class="na">square</span><span class="o">(</span><span class="n">Range</span><span class="o">.</span><span class="na">create</span><span class="o">(</span><span class="n">SIZE</span><span class="o">),</span> <span class="n">in</span><span class="o">,</span> <span class="n">result</span><span class="o">);</span> +</code></pre></div> +<p>I think that this makes Aparapi look more conventional. It also allows us to allow overloading for the first time.</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="kd">interface</span> <span class="nc">SquarerAPI</span> <span class="kd">extends</span> <span class="n">AparapiAPI</span><span class="o">{</span> + <span class="n">square</span><span class="o">(</span><span class="n">Range</span> <span class="n">range</span><span class="o">,</span> <span class="kt">int</span><span class="o">[]</span> <span class="n">in</span><span class="o">,</span> <span class="kt">int</span><span class="o">[]</span> <span class="n">square</span><span class="o">);</span> + <span class="n">square</span><span class="o">(</span><span class="n">Range</span> <span class="n">range</span><span class="o">,</span> <span class="kt">float</span><span class="o">[]</span> <span class="n">in</span><span class="o">,</span> <span class="kt">float</span><span class="o">[]</span> <span class="n">square</span><span class="o">);</span> +<span class="o">}</span> + +<span class="nd">@Implement</span><span class="o">(</span><span class="n">SquarerAPI</span><span class="o">)</span> <span class="kd">class</span> <span class="nc">Squarer</span> <span class="kd">extends</span> <span class="n">Kernel</span><span class="o">{</span> + <span class="kd">public</span> <span class="kt">void</span> <span class="nf">square</span><span class="o">(</span><span class="n">RangeId</span> <span class="n">rangeId</span><span class="o">,</span> <span class="kt">int</span><span class="o">[]</span> <span class="n">in</span><span class="o">,</span> <span class="kt">int</span><span class="o">[]</span> <span class="n">square</span><span class="o">){</span> + <span class="n">square</span><span class="o">[</span><span class="n">rangeId</span><span class="o">.</span><span class="na">gid</span><span class="o">]</span> <span class="o">=</span> <span class="n">in</span><span class="o">[</span><span class="n">rangeId</span><span class="o">.</span><span class="na">gid</span><span class="o">]*</span><span class="n">in</span><span class="o">[</span><span class="n">rangeId</span><span class="o">.</span><span class="na">gid</span><span class="o">];</span> + <span class="o">}</span> + <span class="kd">public</span> <span class="kt">void</span> <span class="nf">square</span><span class="o">(</span><span class="n">RangeId</span> <span class="n">rangeId</span><span class="o">,</span> <span class="kt">float</span><span class="o">[]</span> <span class="n">in</span><span class="o">,</span> <span class="kt">float</span><span class="o">[]</span> <span class="n">square</span><span class="o">){</span> + <span class="n">square</span><span class="o">[</span><span class="n">rangeId</span><span class="o">.</span><span class="na">gid</span><span class="o">]</span> <span class="o">=</span> <span class="n">in</span><span class="o">[</span><span class="n">rangeId</span><span class="o">.</span><span class="na">gid</span><span class="o">]*</span><span class="n">in</span><span class="o">[</span><span class="n">rangeId</span><span class="o">.</span><span class="na">gid</span><span class="o">];</span> + <span class="o">}</span> +<span class="o">}</span> + + +<span class="n">Squarer</span> <span class="n">squarer</span> <span class="o">=</span> <span class="k">new</span> <span class="n">Squarer</span><span class="o">();</span> +<span class="kt">int</span><span class="o">[]</span> <span class="n">in</span> <span class="o">=</span> <span class="c1">// create and fill squarer.in[SIZE]</span> +<span class="kt">int</span><span class="o">[]</span> <span class="n">square</span> <span class="o">=</span> <span class="c1">// create squarer.values[SIZE];</span> + +<span class="n">squarer</span><span class="o">.</span><span class="na">api</span><span class="o">().</span><span class="na">square</span><span class="o">(</span><span class="n">Range</span><span class="o">.</span><span class="na">create</span><span class="o">(</span><span class="n">SIZE</span><span class="o">),</span> <span class="n">in</span><span class="o">,</span> <span class="n">result</span><span class="o">);</span> +<span class="kt">float</span><span class="o">[]</span> <span class="n">inf</span> <span class="o">=</span> <span class="c1">// create and fill squarer.in[SIZE]</span> +<span class="kt">float</span><span class="o">[]</span> <span class="n">squaref</span> <span class="o">=</span> <span class="c1">// create squarer.values[SIZE];</span> + +<span class="n">squarer</span><span class="o">.</span><span class="na">api</span><span class="o">().</span><span class="na">square</span><span class="o">(</span><span class="n">Range</span><span class="o">.</span><span class="na">create</span><span class="o">(</span><span class="n">SIZE</span><span class="o">),</span> <span class="n">inf</span><span class="o">,</span> <span class="n">resultf</span><span class="o">);</span> +</code></pre></div> +<p>test harness</p> +<div class="highlight"><pre class="highlight java"><code> +<span class="kn">import</span> <span class="nn">java.lang.reflect.InvocationHandler</span><span class="o">;</span> +<span class="kn">import</span> <span class="nn">java.lang.reflect.Method</span><span class="o">;</span> +<span class="kn">import</span> <span class="nn">java.lang.reflect.Proxy</span><span class="o">;</span> + + +<span class="kd">public</span> <span class="kd">class</span> <span class="nc">Ideal</span><span class="o">{</span> + + <span class="kd">public</span> <span class="kd">static</span> <span class="kd">class</span> <span class="nc">OpenCLInvocationHandler</span><span class="o"><</span><span class="n">T</span><span class="o">></span> <span class="kd">implements</span> <span class="n">InvocationHandler</span> <span class="o">{</span> + <span class="n">Object</span> <span class="n">instance</span><span class="o">;</span> + <span class="n">OpenCLInvocationHandler</span><span class="o">(</span><span class="n">Object</span> <span class="n">_instance</span><span class="o">){</span> + <span class="n">instance</span> <span class="o">=</span> <span class="n">_instance</span><span class="o">;</span> + <span class="o">}</span> + <span class="nd">@Override</span> <span class="kd">public</span> <span class="n">Object</span> <span class="nf">invoke</span><span class="o">(</span><span class="n">Object</span> <span class="n">interfaceThis</span><span class="o">,</span> <span class="n">Method</span> <span class="n">interfaceMethod</span><span class="o">,</span> <span class="n">Object</span><span class="o">[]</span> <span class="n">interfaceArgs</span><span class="o">)</span> <span class="kd">throws</span> <span class="n">Throwable</span> <span class="o">{</span> + <span class="n">Class</span> <span class="n">clazz</span> <span class="o">=</span> <span class="n">instance</span><span class="o">.</span><span class="na">getClass</span><span class="o">();</span> + + <span class="n">Class</span><span class="o">[]</span> <span class="n">argTypes</span> <span class="o">=</span> <span class="n">interfaceMethod</span><span class="o">.</span><span class="na">getParameterTypes</span><span class="o">();</span> + <span class="n">argTypes</span><span class="o">[</span><span class="mi">0</span><span class="o">]=</span><span class="n">RangeId</span><span class="o">.</span><span class="na">class</span><span class="o">;</span> + <span class="n">Method</span> <span class="n">method</span> <span class="o">=</span> <span class="n">clazz</span><span class="o">.</span><span class="na">getDeclaredMethod</span><span class="o">(</span><span class="n">interfaceMethod</span><span class="o">.</span><span class="na">getName</span><span class="o">(),</span> <span class="n">argTypes</span><span class="o">);</span> + + + <span class="k">if</span> <span class="o">(</span><span class="n">method</span> <span class="o">==</span> <span class="kc">null</span><span class="o">){</span> + <span class="n">System</span><span class="o">.</span><span class="na">out</span><span class="o">.</span><span class="na">println</span><span class="o">(</span><span class="s">"can't find method"</span><span class="o">);</span> + <span class="o">}</span><span class="k">else</span><span class="o">{</span> + <span class="n">RangeId</span> <span class="n">rangeId</span> <span class="o">=</span> <span class="k">new</span> <span class="n">RangeId</span><span class="o">((</span><span class="n">Range</span><span class="o">)</span><span class="n">interfaceArgs</span><span class="o">[</span><span class="mi">0</span><span class="o">]);</span> + <span class="n">interfaceArgs</span><span class="o">[</span><span class="mi">0</span><span class="o">]=</span><span class="n">rangeId</span><span class="o">;</span> + <span class="k">for</span> <span class="o">(</span><span class="n">rangeId</span><span class="o">.</span><span class="na">wgid</span> <span class="o">=</span> <span class="mi">0</span><span class="o">;</span> <span class="n">rangeId</span><span class="o">.</span><span class="na">wgid</span> <span class="o"><</span><span class="n">rangeId</span><span class="o">.</span><span class="na">r</span><span class="o">.</span><span class="na">width</span><span class="o">;</span> <span class="n">rangeId</span><span class="o">.</span><span class="na">wgid</span><span class="o">++){</span> + <span class="n">method</span><span class="o">.</span><span class="na">invoke</span><span class="o">(</span><span class="n">instance</span><span class="o">,</span> <span class="n">interfaceArgs</span><span class="o">);</span> + <span class="o">}</span> + <span class="o">}</span> + + <span class="k">return</span> <span class="kc">null</span><span class="o">;</span> + <span class="o">}</span> + <span class="o">}</span> + + <span class="kd">static</span> <span class="kd">class</span> <span class="nc">Range</span><span class="o">{</span> + <span class="kt">int</span> <span class="n">width</span><span class="o">;</span> + <span class="n">Range</span><span class="o">(</span><span class="kt">int</span> <span class="n">_width</span><span class="o">)</span> <span class="o">{</span> + <span class="n">width</span> <span class="o">=</span> <span class="n">_width</span><span class="o">;</span> + <span class="o">}</span> + <span class="o">}</span> + + <span class="kd">static</span> <span class="kd">class</span> <span class="nc">Range2D</span> <span class="kd">extends</span> <span class="n">Range</span><span class="o">{</span> + <span class="kt">int</span> <span class="n">height</span><span class="o">;</span> + + <span class="n">Range2D</span><span class="o">(</span><span class="kt">int</span> <span class="n">_width</span><span class="o">,</span> <span class="kt">int</span> <span class="n">_height</span><span class="o">)</span> <span class="o">{</span> + <span class="kd">super</span><span class="o">(</span><span class="n">_width</span><span class="o">);</span> + <span class="n">height</span> <span class="o">=</span> <span class="n">_height</span><span class="o">;</span> + <span class="o">}</span> + <span class="o">}</span> + + <span class="kd">static</span> <span class="kd">class</span> <span class="nc">Range1DId</span><span class="o"><</span><span class="n">T</span> <span class="kd">extends</span> <span class="n">Range</span><span class="o">>{</span> + <span class="n">Range1DId</span><span class="o">(</span><span class="n">T</span> <span class="n">_r</span><span class="o">){</span> + <span class="n">r</span> <span class="o">=</span> <span class="n">_r</span><span class="o">;</span> + <span class="o">}</span> + <span class="n">T</span> <span class="n">r</span><span class="o">;</span> + + <span class="kt">int</span> <span class="n">wgid</span><span class="o">,</span> <span class="n">wlid</span><span class="o">,</span> <span class="n">wgsize</span><span class="o">,</span> <span class="n">wlsize</span><span class="o">,</span> <span class="n">wgroup</span><span class="o">;</span> + <span class="o">}</span> + + <span class="kd">static</span> <span class="kd">class</span> <span class="nc">RangeId</span> <span class="kd">extends</span> <span class="n">Range1DId</span><span class="o"><</span><span class="n">Range</span><span class="o">>{</span> + <span class="n">RangeId</span><span class="o">(</span><span class="n">Range</span> <span class="n">r</span><span class="o">){</span> + <span class="kd">super</span><span class="o">(</span><span class="n">r</span><span class="o">);</span> + <span class="o">}</span> + <span class="o">}</span> + + <span class="kd">static</span> <span class="kd">class</span> <span class="nc">Range2DId</span> <span class="kd">extends</span> <span class="n">Range1DId</span><span class="o"><</span><span class="n">Range2D</span><span class="o">>{</span> + <span class="n">Range2DId</span><span class="o">(</span><span class="n">Range2D</span> <span class="n">r</span><span class="o">){</span> + <span class="kd">super</span><span class="o">(</span><span class="n">r</span><span class="o">);</span> + <span class="o">}</span> + + <span class="kt">int</span> <span class="n">hgid</span><span class="o">,</span> <span class="n">hlid</span><span class="o">,</span> <span class="n">hgsize</span><span class="o">,</span> <span class="n">hlsize</span><span class="o">,</span> <span class="n">hgroup</span><span class="o">;</span> + <span class="o">}</span> + + + + + + <span class="kd">static</span> <span class="o"><</span><span class="n">T</span><span class="o">></span> <span class="n">T</span> <span class="nf">create</span><span class="o">(</span><span class="n">Object</span> <span class="n">_instance</span><span class="o">,</span> <span class="n">Class</span><span class="o"><</span><span class="n">T</span><span class="o">></span> <span class="n">_interface</span><span class="o">)</span> <span class="o">{</span> + <span class="n">OpenCLInvocationHandler</span><span class="o"><</span><span class="n">T</span><span class="o">></span> <span class="n">invocationHandler</span> <span class="o">=</span> <span class="k">new</span> <span class="n">OpenCLInvocationHandler</span><span class="o"><</span><span class="n">T</span><span class="o">>(</span><span class="n">_instance</span><span class="o">);</span> + <span class="n">T</span> <span class="n">instance</span> <span class="o">=</span> <span class="o">(</span><span class="n">T</span><span class="o">)</span> <span class="n">Proxy</span><span class="o">.</span><span class="na">newProxyInstance</span><span class="o">(</span><span class="n">Ideal</span><span class="o">.</span><span class="na">class</span><span class="o">.</span><span class="na">getClassLoader</span><span class="o">(),</span> <span class="k">new</span> <span class="n">Class</span><span class="o">[]</span> <span class="o">{</span> + <span class="n">_interface</span><span class="o">,</span> + + <span class="o">},</span> <span class="n">invocationHandler</span><span class="o">);</span> + <span class="k">return</span> <span class="o">(</span><span class="n">instance</span><span class="o">);</span> + + <span class="o">}</span> + + + + <span class="kd">public</span> <span class="kd">static</span> <span class="kd">class</span> <span class="nc">Squarer</span><span class="o">{</span> + <span class="kd">interface</span> <span class="nc">API</span> <span class="o">{</span> + <span class="kd">public</span> <span class="n">API</span> <span class="nf">foo</span><span class="o">(</span><span class="n">Range</span> <span class="n">range</span><span class="o">,</span> <span class="kt">int</span><span class="o">[]</span> <span class="n">in</span><span class="o">,</span> <span class="kt">int</span><span class="o">[]</span> <span class="n">out</span><span class="o">);</span> + <span class="kd">public</span> <span class="n">Squarer</span> <span class="nf">dispatch</span><span class="o">();</span> + + <span class="o">}</span> + + <span class="kd">public</span> <span class="n">API</span> <span class="nf">foo</span><span class="o">(</span><span class="n">RangeId</span> <span class="n">rangeId</span><span class="o">,</span> <span class="kt">int</span><span class="o">[]</span> <span class="n">in</span><span class="o">,</span> <span class="kt">int</span><span class="o">[]</span> <span class="n">out</span><span class="o">)</span> <span class="o">{</span> + <span class="n">out</span><span class="o">[</span><span class="n">rangeId</span><span class="o">.</span><span class="na">wgid</span><span class="o">]</span> <span class="o">=</span> <span class="n">in</span><span class="o">[</span><span class="n">rangeId</span><span class="o">.</span><span class="na">wgid</span><span class="o">]*</span><span class="n">in</span><span class="o">[</span><span class="n">rangeId</span><span class="o">.</span><span class="na">wgid</span><span class="o">];</span> + <span class="k">return</span><span class="o">(</span><span class="kc">null</span><span class="o">);</span> + <span class="o">}</span> + <span class="o">}</span> + + <span class="cm">/** + * @param args + */</span> + <span class="kd">public</span> <span class="kd">static</span> <span class="kt">void</span> <span class="nf">main</span><span class="o">(</span><span class="n">String</span><span class="o">[]</span> <span class="n">args</span><span class="o">)</span> <span class="o">{</span> + + <span class="n">Squarer</span><span class="o">.</span><span class="na">API</span> <span class="n">squarer</span> <span class="o">=</span> <span class="n">create</span><span class="o">(</span><span class="k">new</span> <span class="n">Squarer</span><span class="o">(),</span> <span class="n">Squarer</span><span class="o">.</span><span class="na">API</span><span class="o">.</span><span class="na">class</span><span class="o">);</span> + <span class="kt">int</span><span class="o">[]</span> <span class="n">in</span> <span class="o">=</span> <span class="k">new</span> <span class="kt">int</span><span class="o">[]</span> <span class="o">{</span> + <span class="mi">1</span><span class="o">,</span> + <span class="mi">2</span><span class="o">,</span> + <span class="mi">3</span><span class="o">,</span> + <span class="mi">4</span><span class="o">,</span> + <span class="mi">5</span><span class="o">,</span> + <span class="mi">6</span> + <span class="o">};</span> + <span class="kt">int</span><span class="o">[]</span> <span class="n">out</span> <span class="o">=</span> <span class="k">new</span> <span class="kt">int</span><span class="o">[</span><span class="n">in</span><span class="o">.</span><span class="na">length</span><span class="o">];</span> + <span class="n">Range</span> <span class="n">range</span> <span class="o">=</span> <span class="k">new</span> <span class="n">Range</span><span class="o">(</span><span class="n">in</span><span class="o">.</span><span class="na">length</span><span class="o">);</span> + + <span class="n">squarer</span><span class="o">.</span><span class="na">foo</span><span class="o">(</span><span class="n">range</span><span class="o">,</span> <span class="n">in</span><span class="o">,</span> <span class="n">out</span><span class="o">);</span> + + <span class="k">for</span> <span class="o">(</span><span class="kt">int</span> <span class="nl">s:</span><span class="n">out</span><span class="o">){</span> + <span class="n">System</span><span class="o">.</span><span class="na">out</span><span class="o">.</span><span class="na">println</span><span class="o">(</span><span class="n">s</span><span class="o">);</span> + <span class="o">}</span> + + <span class="o">}</span> + +<span class="o">}</span> +</code></pre></div> +</div> +</main> +<footer class='page-footer'> +<div class='container'> +<div class='row'> +<div class='col l4 s12'> +<h5 class='white-text'>Help Aparapi Grow</h5> +<p class='grey-text text-lighten-4'>We are a team of volunteers working on this project like it's our full time job. Any amount would help support and continue development on this project and is greatly appreciated.</p> +<form action='https://www.paypal.com/cgi-bin/webscr' id='paypal-donate' method='post' target='_top'> +<input name='cmd' type='hidden' value='_s-xclick'> +<input name='encrypted' type='hidden' value='-----BEGIN PKCS7-----MIIHoAYJKoZIhvcNAQcEoIIHkTCCB40CAQExggEwMIIBLAIBADCBlDCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20CAQAwDQYJKoZIhvcNAQEBBQAEgYATcKxN8t35TG2x34eY272SuZO3QbGy+BTGIM5DRV6Hmosotzw2TF42ceWmbXb3Gk4Wy5kUgo4TgHExCZHUSlHUl+A9KWLFejotgQJPhbiBsnns3klWbKftA3LEnP/kz/SW7OyBlpluoHoEGb354/aoX3JEctp3akHiZEmD7JyEgjELMAkGBSsOAwIaBQAwggEcBgkqhkiG9w0BBwEwFAYIKoZIhvcNAwcECOGCJwba6JICgIH4RtE1LE3juagKs+swI5tb9Y2LacWo+qn1H1aLKeg57bQMqqcWYvkoO1joYoglPc1h4mO0egZjHPQ6ih0K0IYlXw2SRpNylSlIMUE3GW6smjSSwRhscZfXQYUnmQsfYvkFwoKrlZGf/1u0Q7nwlZ1szIKnDMZ5f+k8xBcM0sMNutn/y9CH6A3zo01gQBIF29+1WYAoQspNAnfWQy3ydV7nbjIA9ThDp2WquWw3EVlvqlvm/3C2AFuH/L4q0ltn3qjkCdzXK0O2jW3TRrzligPkAy6CN0Tw2jGW5GENNC1L92vHFH4kBXUPlhvw39TgoN7/KRUjVoYPYgugggOHMIIDgzCCAuygAwIBAgIBADANBgkqhkiG9w0BAQUFADCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20wHhcNMDQwMjEzMTAxMzE1WhcNMzUwMjEzMTAxMzE1WjCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20wgZ8wDQYJKoZIhvcNAQEBBQADgY0AMIGJAoGBAMFHTt38RMxLXJyO2SmS+Ndl72T7oKJ4u4uw+6awntALWh03PewmIJuzbALScsTS4sZoS1fKciBGoh11gIfHzylvkdNe/hJl66/RGqrj5rFb08sAABNTzDTiqqNpJeBsYs/c2aiGozptX2RlnBktH+SUNpAajW724Nv2Wvhif6sFAgMBAAGjge4wgeswHQYDVR0OBBYEFJaffLvGbxe9WT9S1wob7BDWZJRrMIG7BgNVHSMEgbMwgbCAFJaffLvGbxe9WT9S1wob7BDWZJRroYGUpIGRMIGOMQswCQYDVQQGEwJVUzELMAkGA1UECBMCQ0ExFjAUBgNVBAcTDU1vdW50YWluIFZpZXcxFDASBgNVBAoTC1BheVBhbCBJbmMuMRMwEQYDVQQLFApsaXZlX2NlcnRzMREwDwYDVQQDFAhsaXZlX2FwaTEcMBoGCSqGSIb3DQEJARYNcmVAcGF5cGFsLmNvbYIBADAMBgNVHRMEBTADAQH/MA0GCSqGSIb3DQEBBQUAA4GBAIFfOlaagFrl71+jq6OKidbWFSE+Q4FqROvdgIONth+8kSK//Y/4ihuE4Ymvzn5ceE3S/iBSQQMjyvb+s2TWbQYDwcp129OPIbD9epdr4tJOUNiSojw7BHwYRiPh58S1xGlFgHFXwrEBb3dgNbMUa+u4qectsMAXpVHnD9wIyfmHMYIBmjCCAZYCAQEwgZQwgY4xCzAJBgNVBAYTAlVTMQswCQYDVQQIEwJDQTEWMBQGA1UEBxMNTW91bnRhaW4gVmlldzEUMBIGA1UEChMLUGF5UGFsIEluYy4xEzARBgNVBAsUCmxpdmVfY2VydHMxETAPBgNVBAMUCGxpdmVfYXBpMRwwGgYJKoZIhvcNAQkBFg1yZUBwYXlwYWwuY29tAgEAMAkGBSsOAwIaBQCgXTAYBgkqhkiG9w0BCQMxCwYJKoZIhvcNAQcBMBwGCSqGSIb3DQEJBTEPFw0xNjExMjkyMjA1NTNaMCMGCSqGSIb3DQEJBDEWBBS4i3Exr/pFcKOJy8uKmH+nGIMjqDANBgkqhkiG9w0BAQEFAASBgDAbFZ2jieloeB/0wCAcvYCFAIXmmBaMS5js/byzU5gK7exSTlRMX74IkmHemItaOcw3wyFlu4i118D9K9SbSbFiX9DGDcezGh42u/6G8TuZMwlvmiehwMioTVcm4jWG40YLiv8pJZypfoSx2w4IAFb4na5i/E1qOrwQOpiBho+s-----END PKCS7----- +'> +<button alt='PayPal - The safer, easier way to pay online!' class='btn waves-effect waves-light red lighten-3' name='action' type='submit'> +Donate Now +</button> +</form> +</div> +<div class='col l4 s12'> +<h5 class='white-text'>Join the Discussion</h5> +<p class='grey-text text-lighten-4'>We have a Gitter chat room set up where you can talk directly with us. Come in and discuss new features, future goals, general problems or questions, or anything else you can think of.</p> +<a class='btn waves-effect waves-light red lighten-3' href='https://gitter.im/Syncleus/aparapi' target='_blank'>Chat</a> +</div> +<div class='col l4 s12' style='overflow: hidden;'> +<h5 class='white-text'>Connect</h5> +<iframe allowtransparency='true' frameborder='0' height='30' scrolling='0' src='http://ghbtns.com/github-btn.html?user=Syncleus&repo=aparapi&type=watch&count=true&size=large' width='170'></iframe> +<br> +<a class='twitter-follow-button' data-dnt='true' data-show-count='true' data-size='large' href='https://twitter.com/AparapiLib'>Follow @AparapiLib</a> +<br> +<div class='g-follow' data-annotation='bubble' data-height='24' data-href='https://plus.google.com/102266131584900704956' data-rel='publisher'></div> +</div> +</div> +</div> +<div class='footer-copyright'> +<div class='container'> +© 2016-2017 Syncleus, All rights reserved. +<a class='grey-text text-lighten-4 right' href='https://github.com/Syncleus/aparapi/blob/master/LICENSE'>Apache License v2</a> +</div> +</div> +</footer> +<!-- Scripts --> +<script src='https://code.jquery.com/jquery-2.1.4.min.js'></script> +<script> + if (!window.jQuery) { document.write('<script src="bin/jquery-2.1.1.min.js"><\/script>'); } +</script> +<script src='/javascripts/jquery.timeago.js'></script> +<script src='/javascripts/materialize.min.js'></script> +<script src='/javascripts/lunr.min.js'></script> +<script src='/javascripts/search.js'></script> +<script src='/javascripts/materialize.js'></script> +<script src='/javascripts/init.js'></script> +<!-- Twitter Button --> +<script> + !function(d,s,id){var js,fjs=d.getElementsByTagName(s)[0],p=/^http:/.test(d.location)?'http':'https';if(!d.getElementById(id)){js=d.createElement(s);js.id=id;js.src=p+'://platform.twitter.com/widgets.js';fjs.parentNode.insertBefore(js,fjs);}}(document, 'script', 'twitter-wjs'); +</script> +<!-- Google Plus Button --> +<script async='' defer='defer' src='https://apis.google.com/js/platform.js'></script> +</body> </html> diff --git a/showcase.html b/showcase.html index 2459c2c10ed7dc37750591854fcee7e6362fc124..7612a1d2f72820901532f30d2fcc1cc2cbff1879 100644 --- a/showcase.html +++ b/showcase.html @@ -1,156 +1,158 @@ <!DOCTYPE html> <html lang='en'> - <head> - <meta content='text/html; charset=UTF-8' http-equiv='Content-Type'> - <meta content='width=device-width, initial-scale=1' name='viewport'> - <meta content='IE=edge' http-equiv='X-UA-Compatible'> - <meta content='no' name='msapplication-tap-highlight'> - <meta content='Aparapi is an Open-source framework for executing native Java code on the GPU, developed by Syncleus.' name='description'> - <title> - Aparapi | Showcase - </title> - <!-- Favicons --> - <link href='/images/favicon/apple-touch-icon-152x152.png' rel='apple-touch-icon-precomposed'> - <meta content='#FFFFFF' name='msapplication-TileColor'> - <meta content='/images/favicon/mstile-144x144.png' name='msapplication-TileImage'> - <link href='/images/favicon/favicon-32x32.png' rel='icon' sizes='32x32'> - <!-- Android 5 Chrome Color --> - <meta content='#EE6E73' name='theme-color'> - <!-- CSS --> - <link href='/stylesheets/highlight.css' media='screen,projection' rel='stylesheet' type='text/css'> - <link href='/stylesheets/style.css' media='screen,projection' rel='stylesheet' type='text/css'> - <link href='http://fonts.googleapis.com/css?family=Inconsolata' rel='stylesheet' type='text/css'> - <link href='http://fonts.googleapis.com/icon?family=Material+Icons' rel='stylesheet'> - </head> - <body> - <header> - <div class='container'> - <a class='button-collapse top-nav waves-effect waves-light circle hide-on-large-only' data-activates='nav-mobile' href='#'> - <i class='material-icons'>menu</i> - </a> - </div> - <ul class='side-nav fixed' id='nav-mobile'> - <li class='logo'> - <a class='brand-logo' href='/' id='logo-container'> - <object data='/images/logo.svg' id='front-page-logo' type='image/svg+xml'>Your browser does not support SVG</object> - </a> - </li> - <li class='search'> - <div class='search-wrapper card'> - <input id='search'> - <i class='material-icons'>search</i> - <div class='search-results'></div> - </div> - </li> - <li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/">Overview</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Introduction</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/about.html">About</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/getting-started.html">Getting Started</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/faq.html">FAQ</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Documentation</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/aparapi-patterns.html">Aparapi Patterns</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/choosing-specific-devices.html">Choosing Specific Devices</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/converting-java-to-opencl.html">Converting Java to OpenCL</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/emulating-multiple-entrypoints.html">Emulating Multiple Entrypoints</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/explicit-buffer-handling.html">Explicit Buffer Handling</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/hsa-enabled-lambda.html">HSA Enabled Lambda</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/kernel-guidelines.html">Kernel Guidelines</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/library-agent-duality.html">Library Agent Duality</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/new-features.html">New Features</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/opencl-bindings.html">OpenCL Bindings</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/private-memory-space.html">Private Memory Space</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/profiling-the-kernel.html">Profiling the Kernel</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/setting-up-hsa.html">Setting Up HSA</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/unit-tests.html">Unit Tests</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/using-hsa-simulator.html">Using HSA Simulator</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/constant-memory.html">Constant Memory</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/local-memory.html">Local Memory</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/multiple-dim-ranges.html">Multiple Dim Ranges</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Proposals</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/multiple-dim-nd-range.html">Multiple Dim ND Range</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/lambdas.html">Lambdas</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/address-space-with-buffers.html">Address Space with Buffers</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/extensions.html">Extensions</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/device.html">Device</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/multiple-entry-points.html">Multiple Entry Points</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/lambda-syntax.html">Lambda Syntax</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/showcase.html">Showcase</a></li></ul></li> - </ul> - </header> - <main> - <div class='section no-pad-bot' id='index-banner'> - <div class='container'> - <h1 class='header center-on-small-only'>Showcase</h1> - <div class='row center'> - <h4 class='header col s12 light center'>Products powered by Aparapi.</h4> - </div> - - </div> - - </div> - <div class='container'> - <div class='container section'> - <div class='row'> - <div class='center col s12'> - <br> - <br> - <a class='waves-effect waves-light btn' href='mailto:aparapi@syncleus.com?subject=showcase'>Submit your site</a> - <br> - <br> - </div> - </div> - <br> - <div class='row'> - <div class='col s12 m4'> - <img class='materialboxed responsive-img z-depth-1' data-caption='Carnivore Pixel' src='images/showcase/carnivore_pixel.gif'> - <h5 class='center'> - <a href='http://edumo.net/wp/carnivore-pixel/' target='_blank'>Carnivore Pixel</a> - </h5> - </div> - <div class='col s12 m4'> - <img class='materialboxed responsive-img z-depth-1' data-caption='DayFlower Engine' src='images/showcase/dayflower_engine.png'> - <h5 class='center'> - <a href='http://www.dayflower.org/' target='_blank'>DayFlower Engine</a> - </h5> - </div> - <div class='col s12 m4'> - <img class='materialboxed responsive-img z-depth-1' data-caption='Gaggle Mail' src='images/showcase/openrc.png'> - <h5 class='center'> - <a href='https://github.com/macroing/OpenRC' target='_blank'>Open Ray Caster</a> - </h5> - </div> - </div> - <div class='row'> - <div class='col s12 m4'> - <img class='materialboxed responsive-img z-depth-1' data-caption='Nextome' src='images/showcase/nextome.jpg'> - <h5 class='center'> - <a href='http://www.nextome.net/' target='_blank'>Nextome</a> - </h5> - </div> - </div> - </div> - </div> - </main> - <footer class='page-footer'> - <div class='container'> - <div class='row'> - <div class='col l4 s12'> - <h5 class='white-text'>Help Aparapi Grow</h5> - <p class='grey-text text-lighten-4'>We are a team of volunteers working on this project like it's our full time job. Any amount would help support and continue development on this project and is greatly appreciated.</p> - <form action='https://www.paypal.com/cgi-bin/webscr' id='paypal-donate' method='post' target='_top'> - <input name='cmd' type='hidden' value='_s-xclick'> - <input name='encrypted' type='hidden' value='-----BEGIN PKCS7-----MIIHoAYJKoZIhvcNAQcEoIIHkTCCB40CAQExggEwMIIBLAIBADCBlDCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20CAQAwDQYJKoZIhvcNAQEBBQAEgYATcKxN8t35TG2x34eY272SuZO3QbGy+BTGIM5DRV6Hmosotzw2TF42ceWmbXb3Gk4Wy5kUgo4TgHExCZHUSlHUl+A9KWLFejotgQJPhbiBsnns3klWbKftA3LEnP/kz/SW7OyBlpluoHoEGb354/aoX3JEctp3akHiZEmD7JyEgjELMAkGBSsOAwIaBQAwggEcBgkqhkiG9w0BBwEwFAYIKoZIhvcNAwcECOGCJwba6JICgIH4RtE1LE3juagKs+swI5tb9Y2LacWo+qn1H1aLKeg57bQMqqcWYvkoO1joYoglPc1h4mO0egZjHPQ6ih0K0IYlXw2SRpNylSlIMUE3GW6smjSSwRhscZfXQYUnmQsfYvkFwoKrlZGf/1u0Q7nwlZ1szIKnDMZ5f+k8xBcM0sMNutn/y9CH6A3zo01gQBIF29+1WYAoQspNAnfWQy3ydV7nbjIA9ThDp2WquWw3EVlvqlvm/3C2AFuH/L4q0ltn3qjkCdzXK0O2jW3TRrzligPkAy6CN0Tw2jGW5GENNC1L92vHFH4kBXUPlhvw39TgoN7/KRUjVoYPYgugggOHMIIDgzCCAuygAwIBAgIBADANBgkqhkiG9w0BAQUFADCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20wHhcNMDQwMjEzMTAxMzE1WhcNMzUwMjEzMTAxMzE1WjCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20wgZ8wDQYJKoZIhvcNAQEBBQADgY0AMIGJAoGBAMFHTt38RMxLXJyO2SmS+Ndl72T7oKJ4u4uw+6awntALWh03PewmIJuzbALScsTS4sZoS1fKciBGoh11gIfHzylvkdNe/hJl66/RGqrj5rFb08sAABNTzDTiqqNpJeBsYs/c2aiGozptX2RlnBktH+SUNpAajW724Nv2Wvhif6sFAgMBAAGjge4wgeswHQYDVR0OBBYEFJaffLvGbxe9WT9S1wob7BDWZJRrMIG7BgNVHSMEgbMwgbCAFJaffLvGbxe9WT9S1wob7BDWZJRroYGUpIGRMIGOMQswCQYDVQQGEwJVUzELMAkGA1UECBMCQ0ExFjAUBgNVBAcTDU1vdW50YWluIFZpZXcxFDASBgNVBAoTC1BheVBhbCBJbmMuMRMwEQYDVQQLFApsaXZlX2NlcnRzMREwDwYDVQQDFAhsaXZlX2FwaTEcMBoGCSqGSIb3DQEJARYNcmVAcGF5cGFsLmNvbYIBADAMBgNVHRMEBTADAQH/MA0GCSqGSIb3DQEBBQUAA4GBAIFfOlaagFrl71+jq6OKidbWFSE+Q4FqROvdgIONth+8kSK//Y/4ihuE4Ymvzn5ceE3S/iBSQQMjyvb+s2TWbQYDwcp129OPIbD9epdr4tJOUNiSojw7BHwYRiPh58S1xGlFgHFXwrEBb3dgNbMUa+u4qectsMAXpVHnD9wIyfmHMYIBmjCCAZYCAQEwgZQwgY4xCzAJBgNVBAYTAlVTMQswCQYDVQQIEwJDQTEWMBQGA1UEBxMNTW91bnRhaW4gVmlldzEUMBIGA1UEChMLUGF5UGFsIEluYy4xEzARBgNVBAsUCmxpdmVfY2VydHMxETAPBgNVBAMUCGxpdmVfYXBpMRwwGgYJKoZIhvcNAQkBFg1yZUBwYXlwYWwuY29tAgEAMAkGBSsOAwIaBQCgXTAYBgkqhkiG9w0BCQMxCwYJKoZIhvcNAQcBMBwGCSqGSIb3DQEJBTEPFw0xNjExMjkyMjA1NTNaMCMGCSqGSIb3DQEJBDEWBBS4i3Exr/pFcKOJy8uKmH+nGIMjqDANBgkqhkiG9w0BAQEFAASBgDAbFZ2jieloeB/0wCAcvYCFAIXmmBaMS5js/byzU5gK7exSTlRMX74IkmHemItaOcw3wyFlu4i118D9K9SbSbFiX9DGDcezGh42u/6G8TuZMwlvmiehwMioTVcm4jWG40YLiv8pJZypfoSx2w4IAFb4na5i/E1qOrwQOpiBho+s-----END PKCS7-----'> - <button alt='PayPal - The safer, easier way to pay online!' class='btn waves-effect waves-light red lighten-3' name='action' type='submit'> - Donate Now - </button> - </form> - </div> - <div class='col l4 s12'> - <h5 class='white-text'>Join the Discussion</h5> - <p class='grey-text text-lighten-4'>We have a Gitter chat room set up where you can talk directly with us. Come in and discuss new features, future goals, general problems or questions, or anything else you can think of.</p> - <a class='btn waves-effect waves-light red lighten-3' href='https://gitter.im/Syncleus/aparapi' target='_blank'>Chat</a> - </div> - <div class='col l4 s12' style='overflow: hidden;'> - <h5 class='white-text'>Connect</h5> - <iframe allowtransparency='true' frameborder='0' height='30' scrolling='0' src='http://ghbtns.com/github-btn.html?user=Syncleus&repo=aparapi&type=watch&count=true&size=large' width='170'></iframe> - <br> - <a class='twitter-follow-button' data-dnt='true' data-show-count='true' data-size='large' href='https://twitter.com/AparapiLib'>Follow @AparapiLib</a> - <br> - <div class='g-follow' data-annotation='bubble' data-height='24' data-href='https://plus.google.com/102266131584900704956' data-rel='publisher'></div> - </div> - </div> - </div> - <div class='footer-copyright'> - <div class='container'> - © 2016-2017 Syncleus, All rights reserved. - <a class='grey-text text-lighten-4 right' href='https://github.com/Syncleus/aparapi/blob/master/LICENSE'>Apache License v2</a> - </div> - </div> - </footer> - <!-- Scripts --> - <script src='https://code.jquery.com/jquery-2.1.4.min.js'></script> - <script> - if (!window.jQuery) { document.write('<script src="bin/jquery-2.1.1.min.js"><\/script>'); } - </script> - <script src='/javascripts/jquery.timeago.js'></script> - <script src='/javascripts/materialize.min.js'></script> - <script src='/javascripts/lunr.min.js'></script> - <script src='/javascripts/search.js'></script> - <script src='/javascripts/materialize.js'></script> - <script src='/javascripts/init.js'></script> - <!-- Twitter Button --> - <script> - !function(d,s,id){var js,fjs=d.getElementsByTagName(s)[0],p=/^http:/.test(d.location)?'http':'https';if(!d.getElementById(id)){js=d.createElement(s);js.id=id;js.src=p+'://platform.twitter.com/widgets.js';fjs.parentNode.insertBefore(js,fjs);}}(document, 'script', 'twitter-wjs'); - </script> - <!-- Google Plus Button --> - <script async='' defer='defer' src='https://apis.google.com/js/platform.js'></script> - </body> +<head> +<meta content='text/html; charset=UTF-8' http-equiv='Content-Type'> +<meta content='width=device-width, initial-scale=1' name='viewport'> +<meta content='IE=edge' http-equiv='X-UA-Compatible'> +<meta content='no' name='msapplication-tap-highlight'> +<meta content='Aparapi is an Open-source framework for executing native Java code on the GPU, developed by Syncleus.' name='description'> +<title> +Aparapi | Showcase +</title> +<!-- Favicons --> +<link href='/images/favicon/apple-touch-icon-152x152.png' rel='apple-touch-icon-precomposed'> +<meta content='#FFFFFF' name='msapplication-TileColor'> +<meta content='/images/favicon/mstile-144x144.png' name='msapplication-TileImage'> +<link href='/images/favicon/favicon-32x32.png' rel='icon' sizes='32x32'> +<!-- Android 5 Chrome Color --> +<meta content='#EE6E73' name='theme-color'> +<!-- CSS --> +<link href='/stylesheets/highlight.css' media='screen,projection' rel='stylesheet' type='text/css'> +<link href='/stylesheets/style.css' media='screen,projection' rel='stylesheet' type='text/css'> +<link href='http://fonts.googleapis.com/css?family=Inconsolata' rel='stylesheet' type='text/css'> +<link href='http://fonts.googleapis.com/icon?family=Material+Icons' rel='stylesheet'> +</head> +<body> +<header> +<div class='container'> +<a class='button-collapse top-nav waves-effect waves-light circle hide-on-large-only' data-activates='nav-mobile' href='#'> +<i class='material-icons'>menu</i> +</a> +</div> +<ul class='side-nav fixed' id='nav-mobile'> +<li class='logo'> +<a class='brand-logo' href='/' id='logo-container'> +<object data='/images/logo.svg' id='front-page-logo' type='image/svg+xml'>Your browser does not support SVG</object> +</a> +</li> +<li class='search'> +<div class='search-wrapper card'> +<input id='search'> +<i class='material-icons'>search</i> +<div class='search-results'></div> +</div> +</li> +<li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/">Overview</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Introduction</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/about.html">About</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/getting-started.html">Getting Started</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/introduction/faq.html">FAQ</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Documentation</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/aparapi-patterns.html">Aparapi Patterns</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/choosing-specific-devices.html">Choosing Specific Devices</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/converting-java-to-opencl.html">Converting Java to OpenCL</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/emulating-multiple-entrypoints.html">Emulating Multiple Entrypoints</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/explicit-buffer-handling.html">Explicit Buffer Handling</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/hsa-enabled-lambda.html">HSA Enabled Lambda</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/kernel-guidelines.html">Kernel Guidelines</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/library-agent-duality.html">Library Agent Duality</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/new-features.html">New Features</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/opencl-bindings.html">OpenCL Bindings</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/private-memory-space.html">Private Memory Space</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/profiling-the-kernel.html">Profiling the Kernel</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/setting-up-hsa.html">Setting Up HSA</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/unit-tests.html">Unit Tests</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/using-hsa-simulator.html">Using HSA Simulator</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/constant-memory.html">Constant Memory</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/local-memory.html">Local Memory</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/documentation/multiple-dim-ranges.html">Multiple Dim Ranges</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a class='collapsible-header waves-effect waves-teal'>Proposals</a><div class='collapsible-body'><ul><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/multiple-dim-nd-range.html">Multiple Dim ND Range</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/lambdas.html">Lambdas</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/address-space-with-buffers.html">Address Space with Buffers</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/extensions.html">Extensions</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/device.html">Device</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/multiple-entry-points.html">Multiple Entry Points</a></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/proposals/lambda-syntax.html">Lambda Syntax</a></li></ul></li></ul></div></li></ul></li><li class='no-padding'><ul class='collapsible collapsible-accordion'><li class='bold'><a href="/showcase.html">Showcase</a></li></ul></li> +</ul> +</header> +<main> +<div class='section no-pad-bot' id='index-banner'> +<div class='container'> +<h1 class='header center-on-small-only'>Showcase</h1> +<div class='row center'> +<h4 class='header col s12 light center'>Products powered by Aparapi.</h4> +</div> + +</div> + +</div> +<div class='container'> +<div class='container section'> +<div class='row'> +<div class='center col s12'> +<br> +<br> +<a class='waves-effect waves-light btn' href='mailto:aparapi@syncleus.com?subject=showcase'>Submit your site</a> +<br> +<br> +</div> +</div> +<br> +<div class='row'> +<div class='col s12 m4'> +<img class='materialboxed responsive-img z-depth-1' data-caption='Carnivore Pixel' src='images/showcase/carnivore_pixel.gif'> +<h5 class='center'> +<a href='http://edumo.net/wp/carnivore-pixel/' target='_blank'>Carnivore Pixel</a> +</h5> +</div> +<div class='col s12 m4'> +<img class='materialboxed responsive-img z-depth-1' data-caption='DayFlower Engine' src='images/showcase/dayflower_engine.png'> +<h5 class='center'> +<a href='http://www.dayflower.org/' target='_blank'>DayFlower Engine</a> +</h5> +</div> +<div class='col s12 m4'> +<img class='materialboxed responsive-img z-depth-1' data-caption='Gaggle Mail' src='images/showcase/openrc.png'> +<h5 class='center'> +<a href='https://github.com/macroing/OpenRC' target='_blank'>Open Ray Caster</a> +</h5> +</div> +</div> +<div class='row'> +<div class='col s12 m4'> +<img class='materialboxed responsive-img z-depth-1' data-caption='Nextome' src='images/showcase/nextome.jpg'> +<h5 class='center'> +<a href='http://www.nextome.net/' target='_blank'>Nextome</a> +</h5> +</div> +</div> +</div> + +</div> +</main> +<footer class='page-footer'> +<div class='container'> +<div class='row'> +<div class='col l4 s12'> +<h5 class='white-text'>Help Aparapi Grow</h5> +<p class='grey-text text-lighten-4'>We are a team of volunteers working on this project like it's our full time job. Any amount would help support and continue development on this project and is greatly appreciated.</p> +<form action='https://www.paypal.com/cgi-bin/webscr' id='paypal-donate' method='post' target='_top'> +<input name='cmd' type='hidden' value='_s-xclick'> +<input name='encrypted' type='hidden' value='-----BEGIN PKCS7-----MIIHoAYJKoZIhvcNAQcEoIIHkTCCB40CAQExggEwMIIBLAIBADCBlDCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20CAQAwDQYJKoZIhvcNAQEBBQAEgYATcKxN8t35TG2x34eY272SuZO3QbGy+BTGIM5DRV6Hmosotzw2TF42ceWmbXb3Gk4Wy5kUgo4TgHExCZHUSlHUl+A9KWLFejotgQJPhbiBsnns3klWbKftA3LEnP/kz/SW7OyBlpluoHoEGb354/aoX3JEctp3akHiZEmD7JyEgjELMAkGBSsOAwIaBQAwggEcBgkqhkiG9w0BBwEwFAYIKoZIhvcNAwcECOGCJwba6JICgIH4RtE1LE3juagKs+swI5tb9Y2LacWo+qn1H1aLKeg57bQMqqcWYvkoO1joYoglPc1h4mO0egZjHPQ6ih0K0IYlXw2SRpNylSlIMUE3GW6smjSSwRhscZfXQYUnmQsfYvkFwoKrlZGf/1u0Q7nwlZ1szIKnDMZ5f+k8xBcM0sMNutn/y9CH6A3zo01gQBIF29+1WYAoQspNAnfWQy3ydV7nbjIA9ThDp2WquWw3EVlvqlvm/3C2AFuH/L4q0ltn3qjkCdzXK0O2jW3TRrzligPkAy6CN0Tw2jGW5GENNC1L92vHFH4kBXUPlhvw39TgoN7/KRUjVoYPYgugggOHMIIDgzCCAuygAwIBAgIBADANBgkqhkiG9w0BAQUFADCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20wHhcNMDQwMjEzMTAxMzE1WhcNMzUwMjEzMTAxMzE1WjCBjjELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtQYXlQYWwgSW5jLjETMBEGA1UECxQKbGl2ZV9jZXJ0czERMA8GA1UEAxQIbGl2ZV9hcGkxHDAaBgkqhkiG9w0BCQEWDXJlQHBheXBhbC5jb20wgZ8wDQYJKoZIhvcNAQEBBQADgY0AMIGJAoGBAMFHTt38RMxLXJyO2SmS+Ndl72T7oKJ4u4uw+6awntALWh03PewmIJuzbALScsTS4sZoS1fKciBGoh11gIfHzylvkdNe/hJl66/RGqrj5rFb08sAABNTzDTiqqNpJeBsYs/c2aiGozptX2RlnBktH+SUNpAajW724Nv2Wvhif6sFAgMBAAGjge4wgeswHQYDVR0OBBYEFJaffLvGbxe9WT9S1wob7BDWZJRrMIG7BgNVHSMEgbMwgbCAFJaffLvGbxe9WT9S1wob7BDWZJRroYGUpIGRMIGOMQswCQYDVQQGEwJVUzELMAkGA1UECBMCQ0ExFjAUBgNVBAcTDU1vdW50YWluIFZpZXcxFDASBgNVBAoTC1BheVBhbCBJbmMuMRMwEQYDVQQLFApsaXZlX2NlcnRzMREwDwYDVQQDFAhsaXZlX2FwaTEcMBoGCSqGSIb3DQEJARYNcmVAcGF5cGFsLmNvbYIBADAMBgNVHRMEBTADAQH/MA0GCSqGSIb3DQEBBQUAA4GBAIFfOlaagFrl71+jq6OKidbWFSE+Q4FqROvdgIONth+8kSK//Y/4ihuE4Ymvzn5ceE3S/iBSQQMjyvb+s2TWbQYDwcp129OPIbD9epdr4tJOUNiSojw7BHwYRiPh58S1xGlFgHFXwrEBb3dgNbMUa+u4qectsMAXpVHnD9wIyfmHMYIBmjCCAZYCAQEwgZQwgY4xCzAJBgNVBAYTAlVTMQswCQYDVQQIEwJDQTEWMBQGA1UEBxMNTW91bnRhaW4gVmlldzEUMBIGA1UEChMLUGF5UGFsIEluYy4xEzARBgNVBAsUCmxpdmVfY2VydHMxETAPBgNVBAMUCGxpdmVfYXBpMRwwGgYJKoZIhvcNAQkBFg1yZUBwYXlwYWwuY29tAgEAMAkGBSsOAwIaBQCgXTAYBgkqhkiG9w0BCQMxCwYJKoZIhvcNAQcBMBwGCSqGSIb3DQEJBTEPFw0xNjExMjkyMjA1NTNaMCMGCSqGSIb3DQEJBDEWBBS4i3Exr/pFcKOJy8uKmH+nGIMjqDANBgkqhkiG9w0BAQEFAASBgDAbFZ2jieloeB/0wCAcvYCFAIXmmBaMS5js/byzU5gK7exSTlRMX74IkmHemItaOcw3wyFlu4i118D9K9SbSbFiX9DGDcezGh42u/6G8TuZMwlvmiehwMioTVcm4jWG40YLiv8pJZypfoSx2w4IAFb4na5i/E1qOrwQOpiBho+s-----END PKCS7----- +'> +<button alt='PayPal - The safer, easier way to pay online!' class='btn waves-effect waves-light red lighten-3' name='action' type='submit'> +Donate Now +</button> +</form> +</div> +<div class='col l4 s12'> +<h5 class='white-text'>Join the Discussion</h5> +<p class='grey-text text-lighten-4'>We have a Gitter chat room set up where you can talk directly with us. Come in and discuss new features, future goals, general problems or questions, or anything else you can think of.</p> +<a class='btn waves-effect waves-light red lighten-3' href='https://gitter.im/Syncleus/aparapi' target='_blank'>Chat</a> +</div> +<div class='col l4 s12' style='overflow: hidden;'> +<h5 class='white-text'>Connect</h5> +<iframe allowtransparency='true' frameborder='0' height='30' scrolling='0' src='http://ghbtns.com/github-btn.html?user=Syncleus&repo=aparapi&type=watch&count=true&size=large' width='170'></iframe> +<br> +<a class='twitter-follow-button' data-dnt='true' data-show-count='true' data-size='large' href='https://twitter.com/AparapiLib'>Follow @AparapiLib</a> +<br> +<div class='g-follow' data-annotation='bubble' data-height='24' data-href='https://plus.google.com/102266131584900704956' data-rel='publisher'></div> +</div> +</div> +</div> +<div class='footer-copyright'> +<div class='container'> +© 2016-2017 Syncleus, All rights reserved. +<a class='grey-text text-lighten-4 right' href='https://github.com/Syncleus/aparapi/blob/master/LICENSE'>Apache License v2</a> +</div> +</div> +</footer> +<!-- Scripts --> +<script src='https://code.jquery.com/jquery-2.1.4.min.js'></script> +<script> + if (!window.jQuery) { document.write('<script src="bin/jquery-2.1.1.min.js"><\/script>'); } +</script> +<script src='/javascripts/jquery.timeago.js'></script> +<script src='/javascripts/materialize.min.js'></script> +<script src='/javascripts/lunr.min.js'></script> +<script src='/javascripts/search.js'></script> +<script src='/javascripts/materialize.js'></script> +<script src='/javascripts/init.js'></script> +<!-- Twitter Button --> +<script> + !function(d,s,id){var js,fjs=d.getElementsByTagName(s)[0],p=/^http:/.test(d.location)?'http':'https';if(!d.getElementById(id)){js=d.createElement(s);js.id=id;js.src=p+'://platform.twitter.com/widgets.js';fjs.parentNode.insertBefore(js,fjs);}}(document, 'script', 'twitter-wjs'); +</script> +<!-- Google Plus Button --> +<script async='' defer='defer' src='https://apis.google.com/js/platform.js'></script> +</body> </html> diff --git a/stylesheets/highlight.css b/stylesheets/highlight.css index 3c3bded7e2886cc1ccddd719e1493dde180846d2..609fc9ef5748d54964b9b7b56bd4b0e39a11a09d 100644 --- a/stylesheets/highlight.css +++ b/stylesheets/highlight.css @@ -8,6 +8,10 @@ color: #dee5e7; background-color: #4e5d62; } +.highlight .gp { + color: #a8e1fe; + font-weight: bold; +} .highlight .c, .highlight .cd, .highlight .cm, .highlight .c1, .highlight .cs { color: #6c8b9f; font-style: italic; @@ -22,8 +26,9 @@ background-color: #cc0000; } .highlight .gr { - color: #fefeec; - background-color: #cc0000; + color: #cc0000; + font-weight: bold; + font-style: italic; } .highlight .k, .highlight .kd, .highlight .kv { color: #f6dd62; @@ -31,6 +36,7 @@ } .highlight .o, .highlight .ow { color: #4df4ff; + font-weight: bold; } .highlight .p, .highlight .pi { color: #4df4ff; @@ -149,6 +155,9 @@ .highlight .w { color: #BBBBBB; } +.highlight .go { + color: #BBBBBB; +} .highlight .nf { color: #a8e1fe; }