CuPBoP/examples/nn/nn_cuda.cu

329 lines
9.2 KiB
Plaintext

#include "cuda.h"
#include <float.h>
#include <stdio.h>
#include <sys/time.h>
#include <vector>
#ifdef TIMING
#include "timing.h"
struct timeval tv;
struct timeval tv_total_start, tv_total_end;
struct timeval tv_h2d_start, tv_h2d_end;
struct timeval tv_d2h_start, tv_d2h_end;
struct timeval tv_kernel_start, tv_kernel_end;
struct timeval tv_mem_alloc_start, tv_mem_alloc_end;
struct timeval tv_close_start, tv_close_end;
float init_time = 0, mem_alloc_time = 0, h2d_time = 0, kernel_time = 0,
d2h_time = 0, close_time = 0, total_time = 0;
#endif
#define min(a, b) a > b ? b : a
#define ceilDiv(a, b) (a + b - 1) / b
#define print(x) printf(#x ": %lu\n", (unsigned long)x)
#define DEBUG false
#define DEFAULT_THREADS_PER_BLOCK 256
#define MAX_ARGS 10
#define REC_LENGTH 53 // size of a record in db
#define LATITUDE_POS \
28 // character position of the latitude value in each record
#define OPEN 10000 // initial value of nearest neighbors
typedef struct latLong {
float lat;
float lng;
} LatLong;
typedef struct record {
char recString[REC_LENGTH];
float distance;
} Record;
int loadData(char *filename, std::vector<Record> &records,
std::vector<LatLong> &locations);
void findLowest(std::vector<Record> &records, float *distances, int numRecords,
int topN);
void printUsage();
int parseCommandline(int argc, char *argv[], char *filename, int *r, float *lat,
float *lng, int *q, int *t, int *p, int *d);
/**
* Kernel
* Executed on GPU
* Calculates the Euclidean distance from each record in the database to the
* target position
*/
__global__ void euclid(LatLong *d_locations, float *d_distances, int numRecords,
float lat, float lng) {
// int globalId = gridDim.x * blockDim.x * blockIdx.y + blockDim.x *
// blockIdx.x + threadIdx.x;
int globalId = blockDim.x * (gridDim.x * blockIdx.y + blockIdx.x) +
threadIdx.x; // more efficient
LatLong *latLong = d_locations + globalId;
if (globalId < numRecords) {
float *dist = d_distances + globalId;
*dist = (float)sqrt((lat - latLong->lat) * (lat - latLong->lat) +
(lng - latLong->lng) * (lng - latLong->lng));
}
}
/**
* This program finds the k-nearest neighbors
**/
int main(int argc, char *argv[]) {
cudaSetDevice(0);
int i = 0;
float lat, lng;
int quiet = 0, timing = 0, platform = 0, device = 0;
std::vector<Record> records;
std::vector<LatLong> locations;
char filename[100];
int resultsCount = 10;
// parse command line
if (parseCommandline(argc, argv, filename, &resultsCount, &lat, &lng, &quiet,
&timing, &platform, &device)) {
printUsage();
return 0;
}
printf("before all\n");
int numRecords = loadData(filename, records, locations);
if (resultsCount > numRecords)
resultsCount = numRecords;
printf("after before all\n");
// Pointers to host memory
float *distances;
// Pointers to device memory
LatLong *d_locations;
float *d_distances;
// Scaling calculations - added by Sam Kauffman
cudaDeviceProp deviceProp;
cudaGetDeviceProperties(&deviceProp, 0);
cudaDeviceSynchronize();
unsigned long maxGridX = deviceProp.maxGridSize[0];
unsigned long threadsPerBlock = 256;
size_t totalDeviceMemory;
size_t freeDeviceMemory;
unsigned long blocks =
ceilDiv(numRecords, threadsPerBlock); // extra threads will do nothing
unsigned long gridY = ceilDiv(blocks, maxGridX);
unsigned long gridX = ceilDiv(blocks, gridY);
// There will be no more than (gridY - 1) extra blocks
dim3 gridDim(gridX, gridY);
/**
* Allocate memory on host and device
*/
distances = (float *)malloc(sizeof(float) * numRecords);
cudaMalloc((void **)&d_locations, sizeof(LatLong) * numRecords);
cudaMalloc((void **)&d_distances, sizeof(float) * numRecords);
/**
* Transfer data from host to device
*/
cudaMemcpy(d_locations, &locations[0], sizeof(LatLong) * numRecords,
cudaMemcpyHostToDevice);
/**
* Execute kernel
*/
printf("before call\n");
euclid<<<gridDim, threadsPerBlock>>>(d_locations, d_distances, numRecords,
lat, lng);
cudaDeviceSynchronize();
printf("after call\n");
// Copy data from device memory to host memory
cudaMemcpy(distances, d_distances, sizeof(float) * numRecords,
cudaMemcpyDeviceToHost);
// find the resultsCount least distances
printf("before find\n");
findLowest(records, distances, numRecords, resultsCount);
printf("after find\n");
// print out results
if (!quiet)
for (i = 0; i < resultsCount; i++) {
printf("%s --> Distance=%f\n", records[i].recString, records[i].distance);
}
free(distances);
// Free memory
cudaFree(d_locations);
cudaFree(d_distances);
#ifdef TIMING
printf("Exec: %f\n", kernel_time);
#endif
}
int loadData(char *filename, std::vector<Record> &records,
std::vector<LatLong> &locations) {
FILE *flist, *fp;
int i = 0;
char dbname[64];
int recNum = 0;
/**Main processing **/
flist = fopen(filename, "r");
while (!feof(flist)) {
/**
* Read in all records of length REC_LENGTH
* If this is the last file in the filelist, then done
* else open next file to be read next iteration
*/
if (fscanf(flist, "%s\n", dbname) != 1) {
fprintf(stderr, "error reading filelist\n");
exit(0);
}
fp = fopen(dbname, "r");
if (!fp) {
printf("error opening a db\n");
exit(1);
}
// read each record
while (!feof(fp)) {
Record record;
LatLong latLong;
fgets(record.recString, 49, fp);
fgetc(fp); // newline
if (feof(fp))
break;
// parse for lat and long
char substr[6];
for (i = 0; i < 5; i++)
substr[i] = *(record.recString + i + 28);
substr[5] = '\0';
latLong.lat = atof(substr);
for (i = 0; i < 5; i++)
substr[i] = *(record.recString + i + 33);
substr[5] = '\0';
latLong.lng = atof(substr);
locations.push_back(latLong);
records.push_back(record);
recNum++;
}
fclose(fp);
}
fclose(flist);
// for(i=0;i<rec_count*REC_LENGTH;i++) printf("%c",sandbox[i]);
return recNum;
}
void findLowest(std::vector<Record> &records, float *distances, int numRecords,
int topN) {
int i, j;
float val;
int minLoc;
Record *tempRec;
float tempDist;
for (i = 0; i < topN; i++) {
minLoc = i;
for (j = i; j < numRecords; j++) {
val = distances[j];
if (val < distances[minLoc])
minLoc = j;
}
// swap locations and distances
tempRec = &records[i];
records[i] = records[minLoc];
records[minLoc] = *tempRec;
tempDist = distances[i];
distances[i] = distances[minLoc];
distances[minLoc] = tempDist;
// add distance to the min we just found
records[i].distance = distances[i];
}
}
int parseCommandline(int argc, char *argv[], char *filename, int *r, float *lat,
float *lng, int *q, int *t, int *p, int *d) {
int i;
if (argc < 2)
return 1; // error
strncpy(filename, argv[1], 100);
char flag;
for (i = 1; i < argc; i++) {
if (argv[i][0] == '-') { // flag
flag = argv[i][1];
switch (flag) {
case 'r': // number of results
i++;
*r = atoi(argv[i]);
break;
case 'l': // lat or lng
if (argv[i][2] == 'a') { // lat
*lat = atof(argv[i + 1]);
} else { // lng
*lng = atof(argv[i + 1]);
}
i++;
break;
case 'h': // help
return 1;
case 'q': // quiet
*q = 1;
break;
case 't': // timing
*t = 1;
break;
case 'p': // platform
i++;
*p = atoi(argv[i]);
break;
case 'd': // device
i++;
*d = atoi(argv[i]);
break;
}
}
}
if ((*d >= 0 && *p < 0) ||
(*p >= 0 &&
*d < 0)) // both p and d must be specified if either are specified
return 1;
return 0;
}
void printUsage() {
printf("Nearest Neighbor Usage\n");
printf("\n");
printf("nearestNeighbor [filename] -r [int] -lat [float] -lng [float] [-hqt] "
"[-p [int] -d [int]]\n");
printf("\n");
printf("example:\n");
printf("$ ./nearestNeighbor filelist.txt -r 5 -lat 30 -lng 90\n");
printf("\n");
printf("filename the filename that lists the data input files\n");
printf("-r [int] the number of records to return (default: 10)\n");
printf("-lat [float] the latitude for nearest neighbors (default: 0)\n");
printf("-lng [float] the longitude for nearest neighbors (default: 0)\n");
printf("\n");
printf("-h, --help Display the help file\n");
printf("-q Quiet mode. Suppress all text output.\n");
printf("-t Print timing information.\n");
printf("\n");
printf("-p [int] Choose the platform (must choose both platform and "
"device)\n");
printf("-d [int] Choose the device (must choose both platform and "
"device)\n");
printf("\n");
printf("\n");
printf("Notes: 1. The filename is required as the first parameter.\n");
printf(" 2. If you declare either the device or the platform,\n");
printf(" you must declare both.\n\n");
}