Building our first C++ cuda library


So the first thing you do when you get CUDA ready is try your hands out on the sample code and NSight (the eclipse of CUDA). This goes smoothly, but as soon as you start writing cpp code for developing libraries, you can hit all kinds of issues. This post has fixes for those crazy issues that has no help on even StackOverflow.

Reference

#include "common.h"
#include <cuda_runtime.h>
#include <sys/time.h>
#ifndef _TIMER_H
#define _TIMER_H
class Timer {
public:
 enum class TIMER_FORMAT {
 MILLISECONDS, MICROSECONDS, NANOSECONDS, SECONDS
 };
enum class PROCESSOR_CONFIG {
 HOST, DEVICE
 };
__host__ Timer(PROCESSOR_CONFIG config);
 __host__ ~Timer();
 __host__ void StartTimer();
 __host__ void StopTimer();
 __host__ double GetElapsedTime(TIMER_FORMAT format);
protected:
 cudaEvent_t m_eventStart;
 cudaEvent_t m_eventStop;
 PROCESSOR_CONFIG m_config;
 double m_startTimeInNS;
 double m_stopTimeInNS;
};
#endif // _TIMER_H

Timer.cu

#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <math.h>

#include "Timer.cuh"

Timer::Timer(PROCESSOR_CONFIG config) {
 if (config == Timer::PROCESSOR_CONFIG::DEVICE) {
 CHECK_API_FOR_ERROR(cudaEventCreate(&m_eventStart));
 CHECK_API_FOR_ERROR(cudaEventCreate(&m_eventStop));
 } else {
#ifdef __COMPILE_FOR_WINDOWS__
 LARGE_INTEGER lpFrequency;
 if (QueryPerformanceFrequency(&lpFrequency) == 0) {
 printf("Could not query performance frequency on CPU");
 m_performanceFrequency = 0;
 }
 else
 m_performanceFrequency = lpFrequency.QuadPart;
#endif
 m_startTimeInNS = 0;
 m_stopTimeInNS = 0;
 }
 m_config = config;
}

Timer::~Timer() {
}

void Timer::StartTimer() {
 if (m_config == Timer::PROCESSOR_CONFIG::DEVICE) {
 CHECK_API_FOR_ERROR(cudaEventRecord(m_eventStart));
 } else {
timespec ts;
 clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &ts);
 m_startTimeInNS = ts.tv_nsec;
}
}

void Timer::StopTimer() {
 if (m_config == Timer::PROCESSOR_CONFIG::DEVICE) {
 CHECK_API_FOR_ERROR(cudaEventRecord(m_eventStop));
 CHECK_API_FOR_ERROR(cudaEventSynchronize(m_eventStop));
 } else {
timespec ts;
 clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &ts);
 m_stopTimeInNS = ts.tv_nsec;
}
}

double Timer::GetElapsedTime(TIMER_FORMAT format) {
 double elapsedTimeInNanoseconds = 0;

 if (m_config == Timer::PROCESSOR_CONFIG::DEVICE) {
 float elapsedTimeInMilliseconds = 0;
 CHECK_API_FOR_ERROR(
 cudaEventElapsedTime(&elapsedTimeInMilliseconds, m_eventStart,
 m_eventStop));
 elapsedTimeInNanoseconds = elapsedTimeInMilliseconds * 1000.0 * 1000.0;
 } else
 elapsedTimeInNanoseconds = m_stopTimeInNS - m_startTimeInNS;

 switch (format) {
 case TIMER_FORMAT::MILLISECONDS:
 return elapsedTimeInNanoseconds / (1000 * 1000);

 case TIMER_FORMAT::MICROSECONDS:
 return (elapsedTimeInNanoseconds / 1000);

 case TIMER_FORMAT::NANOSECONDS:
 return (elapsedTimeInNanoseconds);

 case TIMER_FORMAT::SECONDS:
 return (elapsedTimeInNanoseconds * 1000 * 1000 * 1000);

 default:
 return 0;
 }
}

int main(int argc, char* argv[]) {

}

  • Syntax Error : For some reason I got a “Syntax Error” on the constructor and destructor lines of my CPP code. To fix this, rename your .cpp as .cu. And rename your .h as .cuh.
  • My enums PROCESSOR_CONFIG, would not resolve at  Timer::PROCESSOR_CONFIG::DEVICE. I had a similar issue to this. To fix this I had to compile using a C11 directive. I used this, to fix it. Refer to my answer (Siddharth).
  • Finally I had to add a main (I need to get rid of this expectation soon).
Advertisements

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out / Change )

Twitter picture

You are commenting using your Twitter account. Log Out / Change )

Facebook photo

You are commenting using your Facebook account. Log Out / Change )

Google+ photo

You are commenting using your Google+ account. Log Out / Change )

Connecting to %s