Developer Reference

Migrating OpenCL™ FPGA Designs to SYCL*

ID 767849
Date 5/08/2024
Public

SYCL Sample Code

main.cpp File

#include <algorithm>
#include <array>
#include <numeric>
#include <vector>

#include <sycl/sycl.hpp>
#include <sycl/ext/intel/fpga_extensions.hpp>

using namespace sycl;

// the number of bins in the histogram is constant
constexpr int kNumBins = 10;

// Forward declare the kernel names in the global scope to reduce name mangling
class histogram;

int main(int argc, char* argv[]) {
  // parse command line args
  uint count = 1000000;
  if (argc > 1) {
    count = atoi(argv[1]);
  }

  // host input and output memory
  std::vector<int> in_h(count);
  std::array<int, kNumBins> bins_h = {0};
  std::array<int, kNumBins> bins_ref_h = {0};

  // generate random input and compute the expected result
  std::generate(in_h.begin(), in_h.end(), [] { return rand() % 100; });
  for (auto& x : in_h) { bins_ref_h[x % kNumBins]++; };

    // the device selector
#ifdef FPGA_EMULATOR
  ext::intel::fpga_emulator_selector selector;
#else
  ext::intel::fpga_selector selector;
#endif

  // create the device queue
  queue q(selector);

  try {
    // create SYCL buffers for inputs and outputs
    // providing host pointers (in this case, std::vectors and std::array)
    // allows the runtime to automatically migrate input and output data
    // to and from the device on demand
    buffer in_buf(in_h);
    buffer bins_buf(bins_h);

    // launch the kernel
    event kernel_event = q.submit([&](handler& h) {
      // get accessors to the SYCL buffers
      // 'no_init' tells the runtime that we don't care about the initial
      // contents of the output (z) and avoids copying the output from host to
      // device before launching the kernel.
      accessor in(in_buf, h, read_only);
      accessor bins(bins_buf, h, write_only, no_init);
      
      h.single_task<histogram>([=]() [[intel::kernel_args_restrict]] {
        // store a local copy of the histogram to avoid read-accumulate-writes
        // to global memory
        [[intel::fpga_register]] int bins_local[kNumBins];

        // initialize the local bins
        #pragma unroll
        for (uint i = 0; i < kNumBins; i++) {
          bins_local[i] = 0;
        }
        
        // compute the histogram
        [[intel::initiation_interval(1)]]
        for (uint i = 0; i < count; i++) {
          bins_local[in[i] % kNumBins]++;
        }
        
        // write back the local copy to global memory
        #pragma unroll
        for (uint i = 0; i < kNumBins; i++) {
          bins[i] = bins_local[i];
        }
      });
    });
  } catch (exception const& e) {
    std::cout << "Caught a synchronous SYCL exception: " << e.what() << "\n";
    std::terminate();
  }

  // Exiting the try-catch scope will cause the buffer destructors to be called
  // which will result in an implicit 'wait' on the kernel to finish (since
  // the kernel uses the buffers).
  // Therefore, at this point in the code, we know the kernel has finished
  // and the data has been transferred back to the host.
  // Since x_buf and y_buf are only accessed with 'read_only' buffers,
  // the runtime will not copy them back from the device.

  // validate the results
  bool passed = std::equal(bins_h.begin(), bins_h.end(), bins_ref_h.begin());

  if (passed) {
    printf("PASSED\n");
  } else {
    printf("FAILED\n");
  }

  return passed;
}

Makefile

BOARD=intel_a10gx_pac:pac_a10

fpga_emu: main.cpp
	icpx -fsycl -fintelfpga -DFPGA_EMULATOR main.cpp -o main.fpga_emu

report: main.cpp
	icpx -fsycl -fintelfpga -Xshardware -Xstarget=$(BOARD) -fsycl-link=early main.cpp -o main_report.a

fpga: main.cpp
	icpx -fsycl -fintelfpga -Xshardware -Xstarget=$(BOARD) -reuse-exe=main.fpga main.cpp -o main.fpga

clean:
	rm -rf *.o *.a *.prj