#include "cuda.h" #include #include #include #include #ifdef TIMING #include "timing.h" struct timeval tv; struct timeval tv_total_start, tv_total_end; struct timeval tv_h2d_start, tv_h2d_end; struct timeval tv_d2h_start, tv_d2h_end; struct timeval tv_kernel_start, tv_kernel_end; struct timeval tv_mem_alloc_start, tv_mem_alloc_end; struct timeval tv_close_start, tv_close_end; float init_time = 0, mem_alloc_time = 0, h2d_time = 0, kernel_time = 0, d2h_time = 0, close_time = 0, total_time = 0; #endif #define min(a, b) a > b ? b : a #define ceilDiv(a, b) (a + b - 1) / b #define print(x) printf(#x ": %lu\n", (unsigned long)x) #define DEBUG false #define DEFAULT_THREADS_PER_BLOCK 256 #define MAX_ARGS 10 #define REC_LENGTH 53 // size of a record in db #define LATITUDE_POS \ 28 // character position of the latitude value in each record #define OPEN 10000 // initial value of nearest neighbors typedef struct latLong { float lat; float lng; } LatLong; typedef struct record { char recString[REC_LENGTH]; float distance; } Record; int loadData(char *filename, std::vector &records, std::vector &locations); void findLowest(std::vector &records, float *distances, int numRecords, int topN); void printUsage(); int parseCommandline(int argc, char *argv[], char *filename, int *r, float *lat, float *lng, int *q, int *t, int *p, int *d); /** * Kernel * Executed on GPU * Calculates the Euclidean distance from each record in the database to the * target position */ __global__ void euclid(LatLong *d_locations, float *d_distances, int numRecords, float lat, float lng) { // int globalId = gridDim.x * blockDim.x * blockIdx.y + blockDim.x * // blockIdx.x + threadIdx.x; int globalId = blockDim.x * (gridDim.x * blockIdx.y + blockIdx.x) + threadIdx.x; // more efficient LatLong *latLong = d_locations + globalId; if (globalId < numRecords) { float *dist = d_distances + globalId; *dist = (float)sqrt((lat - latLong->lat) * (lat - latLong->lat) + (lng - latLong->lng) * (lng - latLong->lng)); } } /** * This program finds the k-nearest neighbors **/ int main(int argc, char *argv[]) { cudaSetDevice(0); int i = 0; float lat, lng; int quiet = 0, timing = 0, platform = 0, device = 0; std::vector records; std::vector locations; char filename[100]; int resultsCount = 10; // parse command line if (parseCommandline(argc, argv, filename, &resultsCount, &lat, &lng, &quiet, &timing, &platform, &device)) { printUsage(); return 0; } printf("before all\n"); int numRecords = loadData(filename, records, locations); if (resultsCount > numRecords) resultsCount = numRecords; printf("after before all\n"); // Pointers to host memory float *distances; // Pointers to device memory LatLong *d_locations; float *d_distances; // Scaling calculations - added by Sam Kauffman cudaDeviceProp deviceProp; cudaGetDeviceProperties(&deviceProp, 0); cudaDeviceSynchronize(); unsigned long maxGridX = deviceProp.maxGridSize[0]; unsigned long threadsPerBlock = 256; size_t totalDeviceMemory; size_t freeDeviceMemory; unsigned long blocks = ceilDiv(numRecords, threadsPerBlock); // extra threads will do nothing unsigned long gridY = ceilDiv(blocks, maxGridX); unsigned long gridX = ceilDiv(blocks, gridY); // There will be no more than (gridY - 1) extra blocks dim3 gridDim(gridX, gridY); /** * Allocate memory on host and device */ distances = (float *)malloc(sizeof(float) * numRecords); cudaMalloc((void **)&d_locations, sizeof(LatLong) * numRecords); cudaMalloc((void **)&d_distances, sizeof(float) * numRecords); /** * Transfer data from host to device */ cudaMemcpy(d_locations, &locations[0], sizeof(LatLong) * numRecords, cudaMemcpyHostToDevice); /** * Execute kernel */ printf("before call\n"); euclid<<>>(d_locations, d_distances, numRecords, lat, lng); cudaDeviceSynchronize(); printf("after call\n"); // Copy data from device memory to host memory cudaMemcpy(distances, d_distances, sizeof(float) * numRecords, cudaMemcpyDeviceToHost); // find the resultsCount least distances printf("before find\n"); findLowest(records, distances, numRecords, resultsCount); printf("after find\n"); // print out results if (!quiet) for (i = 0; i < resultsCount; i++) { printf("%s --> Distance=%f\n", records[i].recString, records[i].distance); } free(distances); // Free memory cudaFree(d_locations); cudaFree(d_distances); #ifdef TIMING printf("Exec: %f\n", kernel_time); #endif } int loadData(char *filename, std::vector &records, std::vector &locations) { FILE *flist, *fp; int i = 0; char dbname[64]; int recNum = 0; /**Main processing **/ flist = fopen(filename, "r"); while (!feof(flist)) { /** * Read in all records of length REC_LENGTH * If this is the last file in the filelist, then done * else open next file to be read next iteration */ if (fscanf(flist, "%s\n", dbname) != 1) { fprintf(stderr, "error reading filelist\n"); exit(0); } fp = fopen(dbname, "r"); if (!fp) { printf("error opening a db\n"); exit(1); } // read each record while (!feof(fp)) { Record record; LatLong latLong; fgets(record.recString, 49, fp); fgetc(fp); // newline if (feof(fp)) break; // parse for lat and long char substr[6]; for (i = 0; i < 5; i++) substr[i] = *(record.recString + i + 28); substr[5] = '\0'; latLong.lat = atof(substr); for (i = 0; i < 5; i++) substr[i] = *(record.recString + i + 33); substr[5] = '\0'; latLong.lng = atof(substr); locations.push_back(latLong); records.push_back(record); recNum++; } fclose(fp); } fclose(flist); // for(i=0;i &records, float *distances, int numRecords, int topN) { int i, j; float val; int minLoc; Record *tempRec; float tempDist; for (i = 0; i < topN; i++) { minLoc = i; for (j = i; j < numRecords; j++) { val = distances[j]; if (val < distances[minLoc]) minLoc = j; } // swap locations and distances tempRec = &records[i]; records[i] = records[minLoc]; records[minLoc] = *tempRec; tempDist = distances[i]; distances[i] = distances[minLoc]; distances[minLoc] = tempDist; // add distance to the min we just found records[i].distance = distances[i]; } } int parseCommandline(int argc, char *argv[], char *filename, int *r, float *lat, float *lng, int *q, int *t, int *p, int *d) { int i; if (argc < 2) return 1; // error strncpy(filename, argv[1], 100); char flag; for (i = 1; i < argc; i++) { if (argv[i][0] == '-') { // flag flag = argv[i][1]; switch (flag) { case 'r': // number of results i++; *r = atoi(argv[i]); break; case 'l': // lat or lng if (argv[i][2] == 'a') { // lat *lat = atof(argv[i + 1]); } else { // lng *lng = atof(argv[i + 1]); } i++; break; case 'h': // help return 1; case 'q': // quiet *q = 1; break; case 't': // timing *t = 1; break; case 'p': // platform i++; *p = atoi(argv[i]); break; case 'd': // device i++; *d = atoi(argv[i]); break; } } } if ((*d >= 0 && *p < 0) || (*p >= 0 && *d < 0)) // both p and d must be specified if either are specified return 1; return 0; } void printUsage() { printf("Nearest Neighbor Usage\n"); printf("\n"); printf("nearestNeighbor [filename] -r [int] -lat [float] -lng [float] [-hqt] " "[-p [int] -d [int]]\n"); printf("\n"); printf("example:\n"); printf("$ ./nearestNeighbor filelist.txt -r 5 -lat 30 -lng 90\n"); printf("\n"); printf("filename the filename that lists the data input files\n"); printf("-r [int] the number of records to return (default: 10)\n"); printf("-lat [float] the latitude for nearest neighbors (default: 0)\n"); printf("-lng [float] the longitude for nearest neighbors (default: 0)\n"); printf("\n"); printf("-h, --help Display the help file\n"); printf("-q Quiet mode. Suppress all text output.\n"); printf("-t Print timing information.\n"); printf("\n"); printf("-p [int] Choose the platform (must choose both platform and " "device)\n"); printf("-d [int] Choose the device (must choose both platform and " "device)\n"); printf("\n"); printf("\n"); printf("Notes: 1. The filename is required as the first parameter.\n"); printf(" 2. If you declare either the device or the platform,\n"); printf(" you must declare both.\n\n"); }