Spawn Sync Java, JOCL, PRAMs




CS255

Chris Pollett

Feb 20, 2019

Outline

Introduction

A Simple Multithreaded Program

Let A =[0, 0] be a global array of length 2

A = [0,0]
SpawnSync(true, 0)

SpawnSync(parent, location):
1 if(parent):
      spawn SpawnSync(false, location + 1)
2 A[location] = location + 1
3 sync 
4 if(parent):
     for i =0 to A.length - 1: print A[i]

The above code should output:

1
2

Java Threads

A Simple Spawn Sync Example Java

import java.lang.Thread;

public class SpawnSyncDemo extends Thread
{
    public SpawnSyncDemo(SpawnSyncDemo spawner, int location)
    {
        this.location = location;
        this.spawner = spawner;
        done = false;
    }
    public void run()
    {
        SpawnSyncDemo child = null;
        done = false;
        if(spawner == null)
        {
            child = new SpawnSyncDemo(this, location + 1);
            child.start();
        }
        a[location] = location + 1;
        done = true;
        if(child != null)
        {
            child.sync();
        }
        if(spawner == null)
        {
            for(int i = 0; i < a.length; i++)
            {
                System.out.println(a[i]);
            }
        }
    }
    public synchronized void sync()
    {
        if(!done)
        {
            try
            {
                wait(); /*parent executes this code
                          and waits until child thread
                          completes
                         */
            }
            catch(InterruptedException ie)
            {
                ie.printStackTrace();
            }
        }
    }
    public static void main(String args[])
    {
        SpawnSyncDemo parent = new SpawnSyncDemo(null, 0);
        parent.start();
    }
    int location;
    SpawnSyncDemo spawner;
    boolean done;
    static int a[] = {0, 0};
}

Thread Sum Demo

import java.lang.Thread;

public class SumDemo extends Thread
{
    public SumDemo(SumDemo spawner, int low, int high)
    {
        this.high = high;
        this.low = low;
        this.spawner = spawner;
        done = false;
        c = 0;
    }
    public void run()
    {
        SumDemo firstHalf = null;
        SumDemo secondHalf = null;
        done = false;
        if(low == high)
        {
            c = a[low];
        }
        else
        {
            int mid = (low + high) / 2;
            firstHalf = new SumDemo(this, low, mid);
            firstHalf.start();
            secondHalf = new SumDemo(this, mid + 1, high);
            secondHalf.start();
        }
        if(low != high ){
            firstHalf.sync();
            secondHalf.sync();
            c = firstHalf.c + secondHalf.c;
        }
        done = true;
        if(spawner == null)
        {
            System.out.println(c);
        }
    }
    public synchronized void sync()
    {
        if(!done)
        {
            try
            {
                wait(); 
            }
            catch(InterruptedException ie)
            {
                ie.printStackTrace();
            }
        }
    }
    public static void main(String args[])
    {
        SumDemo parent = new SumDemo(null, 0, a.length - 1);
        parent.start();
    }
    int low;
    int high;
    int c = 0;
    SumDemo spawner;
    boolean done;
    static int a[] = {4, 3, 9, -1, 1};

}

In-Class Exercise

OpenCL

Java Open CL Example

/*
 * JOCL - Java bindings for OpenCL
 * 
 * Copyright 2009 Marco Hutter - http://www.jocl.org/
 */


import static org.jocl.CL.*;

import org.jocl.*;

/**
 * A small JOCL sample.
 */
public class JOCLSample
{
    /**
     * The source code of the OpenCL program to execute
     */
    private static String programSource =
        "__kernel void "+
        "sampleKernel(__global const float *a,"+
        "             __global const float *b,"+
        "             __global float *c)"+
        "{"+
        "    int gid = get_global_id(0);"+
        "    c[gid] = a[gid] * b[gid];"+
        "}";
    

    /**
     * The entry point of this sample
     * 
     * @param args Not used
     */
    public static void main(String args[])
    {
        // Create input- and output data 
        int n = 10;
        float srcArrayA[] = new float[n];
        float srcArrayB[] = new float[n];
        float dstArray[] = new float[n];
        for (int i=0; i<n; i++)
        {
            srcArrayA[i] = i;
            srcArrayB[i] = i;
        }
        Pointer srcA = Pointer.to(srcArrayA);
        Pointer srcB = Pointer.to(srcArrayB);
        Pointer dst = Pointer.to(dstArray);

        // The platform, device type and device number
        // that will be used
        final int platformIndex = 0;
        final long deviceType = CL_DEVICE_TYPE_ALL;
        final int deviceIndex = 0;

        // Enable exceptions and subsequently omit error checks in this sample
        CL.setExceptionsEnabled(true);

        // Obtain the number of platforms
        int numPlatformsArray[] = new int[1];
        clGetPlatformIDs(0, null, numPlatformsArray);
        int numPlatforms = numPlatformsArray[0];

        // Obtain a platform ID
        cl_platform_id platforms[] = new cl_platform_id[numPlatforms];
        clGetPlatformIDs(platforms.length, platforms, null);
        cl_platform_id platform = platforms[platformIndex];

        // Initialize the context properties
        cl_context_properties contextProperties = new cl_context_properties();
        contextProperties.addProperty(CL_CONTEXT_PLATFORM, platform);
        
        // Obtain the number of devices for the platform
        int numDevicesArray[] = new int[1];
        clGetDeviceIDs(platform, deviceType, 0, null, numDevicesArray);
        int numDevices = numDevicesArray[0];
        
        // Obtain a device ID 
        cl_device_id devices[] = new cl_device_id[numDevices];
        clGetDeviceIDs(platform, deviceType, numDevices, devices, null);
        cl_device_id device = devices[deviceIndex];

        // Create a context for the selected device
        cl_context context = clCreateContext(
            contextProperties, 1, new cl_device_id[]{device}, 
            null, null, null);
        
        /* Create a command-queue for the selected device
           This syntax is not deprecated in OpenCL 2 in favor of
           cl_queue_properties cmd_props = new cl_queue_properties();
           int[] errors = new int[1];
           cl_command_queue commandQueue =
               clCreateCommandQueueWithProperties(context, device, cmd_props,
               errors);
            However, the new syntax requires native compilation I have not done
         */
        cl_command_queue commandQueue = 
            clCreateCommandQueue(context, device, 0, null);

        // Allocate the memory objects for the input- and output data
        cl_mem memObjects[] = new cl_mem[3];
        memObjects[0] = clCreateBuffer(context, 
            CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
            Sizeof.cl_float * n, srcA, null);
        memObjects[1] = clCreateBuffer(context, 
            CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
            Sizeof.cl_float * n, srcB, null);
        memObjects[2] = clCreateBuffer(context, 
            CL_MEM_READ_WRITE, 
            Sizeof.cl_float * n, null, null);
        
        // Create the program from the source code
        cl_program program = clCreateProgramWithSource(context,
            1, new String[]{ programSource }, null, null);
        
        // Build the program
        clBuildProgram(program, 0, null, null, null, null);
        
        // Create the kernel
        cl_kernel kernel = clCreateKernel(program, "sampleKernel", null);
        
        // Set the arguments for the kernel
        clSetKernelArg(kernel, 0, 
            Sizeof.cl_mem, Pointer.to(memObjects[0]));
        clSetKernelArg(kernel, 1, 
            Sizeof.cl_mem, Pointer.to(memObjects[1]));
        clSetKernelArg(kernel, 2, 
            Sizeof.cl_mem, Pointer.to(memObjects[2]));
        
        // Set the work-item dimensions
        long global_work_size[] = new long[]{n};
        long local_work_size[] = new long[]{1};
        
        // Execute the kernel
        clEnqueueNDRangeKernel(commandQueue, kernel, 1, null,
            global_work_size, local_work_size, 0, null, null);
        
        // Read the output data
        clEnqueueReadBuffer(commandQueue, memObjects[2], CL_TRUE, 0,
            n * Sizeof.cl_float, dst, 0, null, null);
        
        // Release kernel, program, and memory objects
        clReleaseMemObject(memObjects[0]);
        clReleaseMemObject(memObjects[1]);
        clReleaseMemObject(memObjects[2]);
        clReleaseKernel(kernel);
        clReleaseProgram(program);
        clReleaseCommandQueue(commandQueue);
        clReleaseContext(context);
        
        // Verify the result
        boolean passed = true;
        final float epsilon = 1e-7f;
        for (int i=0; i<n; i++)
        {
            float x = dstArray[i];
            float y = srcArrayA[i] * srcArrayB[i];
            boolean epsilonEqual = Math.abs(x - y) <= epsilon * Math.abs(x);
            if (!epsilonEqual)
            {
                passed = false;
                break;
            }
        }
        System.out.println("Test "+(passed?"PASSED":"FAILED"));
        if (n <= 10)
        {
            System.out.println("Result: "+java.util.Arrays.toString(dstArray));
        }
    }
}

OpenCL Compute Units and Processing Elements

OpenCL Kinds of Memory and Barriers