Developer Reference

Migrating OpenCL™ FPGA Designs to SYCL*

ID 767849
Date 5/08/2024
Public

SYCL Sample Code With Explicit Data Movement

main.cpp File

#include <algorithm>
#include <array>
#include <numeric>
#include <vector>

#include <sycl/sycl.hpp>
#include <sycl/ext/intel/fpga_extensions.hpp>

using namespace sycl;

// the number of bins in the histogram is constant
constexpr int kNumBins = 10;

// Forward declare the kernel names in the global scope to reduce name mangling
class histogram;

int main(int argc, char* argv[]) {
  // parse command line args
  uint count = 1000000;
  if (argc > 1) {
    count = atoi(argv[1]);
  }

  // host input and output memory
  std::vector<int> in_h(count);
  std::array<int, kNumBins> bins_h = {0};
  std::array<int, kNumBins> bins_ref_h = {0};

  // generate random input and compute the expected result
  std::generate(in_h.begin(), in_h.end(), [] { return rand() % 100; });
  for (auto& x : in_h) { bins_ref_h[x % kNumBins]++; };

    // the device selector
#ifdef FPGA_EMULATOR
  ext::intel::fpga_emulator_selector selector;
#else
  ext::intel::fpga_selector selector;
#endif

  // create the device queue
  queue q(selector);

  // allocate memory on the device
  int *in_d, *bins_d;
  if ((in_d = malloc_device<int>(count, q)) == nullptr) {
    std::cerr << "ERROR: could not allocate space for 'in_d'\n";
    std::terminate();
  }
  if ((bins_d = malloc_device<int>(kNumBins, q)) == nullptr) {
    std::cerr << "ERROR: could not allocate space for 'bins_d'\n";
    std::terminate();
  }

  try {
    // copy input to device (blocking, using .wait() on the returned event)
    q.memcpy(in_d, in_h.data(), count*sizeof(int)).wait();
    q.memcpy(bins_d, bins_h.data(), kNumBins*sizeof(int)).wait();

    // launch the kernel
    event kernel_event =
      q.single_task<histogram>([=]() [[intel::kernel_args_restrict]] {
        // inform the compiler that the pointer lives on the device
        device_ptr<int> in(in_d);
        device_ptr<int> bins(bins_d);
        
        // store a local copy of the histogram to avoid read-accumulate-writes
        // to global memory
        [[intel::fpga_register]] int bins_local[kNumBins];

        // initialize the local bins
        #pragma unroll
        for (uint i = 0; i < kNumBins; i++) {
          bins_local[i] = 0;
        }
        
        // compute the histogram
        [[intel::initiation_interval(1)]]
        for (uint i = 0; i < count; i++) {
          bins_local[in[i] % kNumBins]++;
        }
        
        // write back the local copy to global memory
        #pragma unroll
        for (uint i = 0; i < kNumBins; i++) {
          bins[i] = bins_local[i];
        }
      });

    // wait for the kernel to finish
    kernel_event.wait();

    // copy the output back from the device
    q.memcpy(bins_h.data(), bins_d, kNumBins*sizeof(int)).wait();

  } catch (exception const& e) {
    std::cout << "Caught a synchronous SYCL exception: " << e.what() << "\n";
    std::terminate();
  }

  // validate the results
  bool passed = std::equal(bins_h.begin(), bins_h.end(), bins_ref_h.begin());

  if (passed) {
    printf("PASSED\n");
  } else {
    printf("FAILED\n");
  }

  // free the allocated device memory
  sycl::free(in_d, q);
  sycl::free(bins_d, q);

  return passed;
}
BOARD=intel_a10gx_pac:pac_a10

fpga_emu: main.cpp
	dpcpp -fintelfpga -DFPGA_EMULATOR main.cpp -o main.fpga_emu

report: main.cpp
	dpcpp -fintelfpga -Xshardware -Xstarget=$(BOARD) -fsycl-link=early main.cpp -o main_report.a

fpga: main.cpp
	dpcpp -fintelfpga -Xshardware -Xstarget=$(BOARD) -reuse-exe=main.fpga main.cpp -o main.fpga

clean:
	rm -rf *.o *.a *.prj