Developer Reference

Migrating OpenCL™ FPGA Designs to SYCL*

ID 767849
Date 5/08/2024
Public

OpenCL Sample Code

host.cpp File

#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <math.h>

#include "AOCLUtils/aocl_utils.h"
#include "CL/opencl.h"

#include "constants.h"

using namespace aocl_utils;

static const char* kernel_name = "histogram";
static const char* aocx_name = "histogram";

static cl_platform_id platform;
static cl_device_id device;
static cl_context context;
static cl_command_queue queue;
static cl_kernel kernel;
static cl_program program;
static cl_int status;

static void freeResources() {
  if(kernel)
    clReleaseKernel(kernel);
  if(program)
    clReleaseProgram(program);
  if(queue)
    clReleaseCommandQueue(queue);
  if(context)
    clReleaseContext(context);
}

void cleanup() {
  freeResources();
}

void context_error_callback(const char *errinfo, const void *private_info, size_t cb, void *user_data) {
  printf("Error message in callback: %s\n", errinfo);
}

int main(int argc, char *argv[]) {
  // parse command line args
  uint count = 1000000;
  if (argc > 1) {
    count = atoi(argv[1]);
  }

  int *in_h = (int *) malloc(count * sizeof(int));
  int *bins_h = (int *) malloc(K_NUM_BINS * sizeof(int));
  int *bins_ref_h = (int *) malloc(K_NUM_BINS * sizeof(int));

  for (size_t i = 0; i < count; i++) {
    in_h[i] = rand() % 100;
  }

  for (size_t i = 0; i < K_NUM_BINS; i++) {
    bins_ref_h[i] = 0;
  }

  for (size_t i = 0; i < count; i++) {
    bins_ref_h[in_h[i] % K_NUM_BINS]++;
  }

  cl_uint num_platforms;
  cl_uint num_devices;

  if(!setCwdToExeDir()) {
    return false;
  }
 
#ifdef FPGA_EMULATOR
    platform = findPlatform("Intel(R) FPGA Emulation Platform for OpenCL(TM)");
#else 
    platform = findPlatform("Intel(R) FPGA SDK for OpenCL(TM)");
#endif

  if(platform == NULL) {
    printf("ERROR: Unable to find Intel(R) FPGA OpenCL platform.\n");
    return -1;
  }

  status = clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, 1, &device, num_devices);
  checkError(status, "Failed clGetDeviceIDs.");

  context = clCreateContext(0, num_devices, &device, &context_error_callback,
                            NULL, &status);
  checkError(status, "Failed clCreateContext.");

  queue = clCreateCommandQueue(context, device, CL_QUEUE_PROFILING_ENABLE, &status);
  checkError(status, "Failed clCreateCommandQueue.");

  std::string binary_file = getBoardBinaryFile(aocx_name, device);
  program = createProgramFromBinary(context, binary_file.c_str(), &device, num_devices);
  checkError(status, "Failed clCreateProgramWithBinary.");

  status = clBuildProgram(program, 0, NULL, "", NULL, NULL);
  checkError(status, "Failed clBuildProgram.");

  kernel = clCreateKernel(program, kernel_name, &status);
  checkError(status, "Failed clCreateKernel.");

  cl_mem in_d = clCreateBuffer(context, CL_MEM_READ_WRITE, count * sizeof(int), NULL, &status);
  checkError(status, "clCreateBuffer in_d");
  cl_mem bins_d = clCreateBuffer(context, CL_MEM_READ_WRITE, K_NUM_BINS * sizeof(int), NULL, &status);
  checkError(status, "clCreateBuffer bins_d");

  status = clEnqueueWriteBuffer(queue, in_d, CL_TRUE, 0, count * sizeof(int), in_h, 0, NULL, NULL);
  checkError(status, "clEnqueueWriteBuffer failed for in_d");

  status = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void*)&in_d);
  checkError(status, "Failed to set kernel arg 0");
  status = clSetKernelArg(kernel, 1, sizeof(cl_mem), (void*)&bins_d);
  checkError(status, "Failed to set kernel arg 1");
  status = clSetKernelArg(kernel, 2, sizeof(cl_uint), (void*)&count);
  checkError(status, "Failed to set kernel arg 2");

  status = clEnqueueTask(queue, kernel, 0, NULL, NULL);
  checkError(status, "Failed to launch kernel.");

  checkError(status, "clFinish failed");

  status = clEnqueueReadBuffer(queue, bins_d, CL_TRUE, 0, K_NUM_BINS * sizeof(int), bins_h, 0, NULL, NULL);
  checkError(status, "clEnqueueReadBuffer failed");
  
  int passed = 1;
  for (int i = 0; i < K_NUM_BINS; i++) {
    if (bins_h[i] != bins_ref_h[i]) {
      passed = 0;
    }
  }

  if (passed) {
    printf("PASSED\n");
  } else {
    printf("FAILED\n");
  }
  
  clReleaseMemObject(in_d);
  clReleaseMemObject(bins_d);

  freeResources();
  free(in_h);
  free(bins_h);
  free(bins_ref_h);

  return passed;
}

histogram.cl File

#include "constants.h"

__kernel void histogram(__global int *restrict in,
                        __global int *restrict bins,
                        uint count) {
  // store a local copy of the histogram to avoid read-accumulate-writes
  // to global memory
  __attribute__((register)) int bins_local[K_NUM_BINS];

  // initialize the local bins
  #pragma unroll
  for (uint i = 0; i < K_NUM_BINS; i++) {
    bins_local[i] = 0;
  }

  // compute the histogram
  #pragma ii 1
  for (uint i = 0; i < count; i++) {
    bins_local[in[i] % K_NUM_BINS]++;
  }
  
  // write back the local copy to global memory
  #pragma unroll
  for (uint i = 0; i < K_NUM_BINS; i++) {
    bins[i] = bins_local[i];
  }
}

constants.h File

#ifndef __CONSTANTS_H__
#define __CONSTANTS_H__

#define K_NUM_BINS 10

#endif /* __CONSTANTS_H__ */

Makefile

CXX=g++
BOARD=intel_a10gx_pac:pac_a10

device_emu: histogram.cl
	aoc -march=emulator histogram.cl -o histogram.aocx

device_report: histogram.cl
	aoc -rtl histogram.cl -report

device_fpga: histogram.cl
	aoc -board=$(BOARD) histogram.cl -o histogram.aocx

host_emu: host.cpp
	$(CXX) -L${INTELFPGAOCLSDKROOT}/host/linux64/lib/ -lOpenCL -I. -DFPGA_EMULATOR host.cpp ./AOCLUtils/opencl.cpp -o host

host_fpga: host.cpp
	$(CXX) -L${INTELFPGAOCLSDKROOT}/host/linux64/lib/ -lOpenCL -I. host.cpp ./AOCLUtils/opencl.cpp -o host

clean:
	rm -rf *.o *.a *.prj *.fpga_emu *.fpga *.aocx *.aocr *.aoco *.temp host