CuPBoP/examples/hotspot3D/3D.cu

206 lines
5.6 KiB
Plaintext

#include <assert.h>
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/time.h>
#include <time.h>
#define BLOCK_SIZE 16
#define STR_SIZE 256
#define block_x_ 128
#define block_y_ 2
#define block_z_ 1
#define MAX_PD (3.0e6)
/* required precision in degrees */
#define PRECISION 0.001
#define SPEC_HEAT_SI 1.75e6
#define K_SI 100
/* capacitance fitting factor */
#define FACTOR_CHIP 0.5
#include "opt1.cu"
/* chip parameters */
float t_chip = 0.0005;
float chip_height = 0.016;
float chip_width = 0.016; /* ambient temperature, assuming no package at all
*/
float amb_temp = 80.0;
void fatal(const char *s) { fprintf(stderr, "Error: %s\n", s); }
void readinput(float *vect, int grid_rows, int grid_cols, int layers,
char *file) {
int i, j, k;
FILE *fp;
char str[STR_SIZE];
float val;
if ((fp = fopen(file, "r")) == 0)
fatal("The file was not opened");
for (i = 0; i <= grid_rows - 1; i++)
for (j = 0; j <= grid_cols - 1; j++)
for (k = 0; k <= layers - 1; k++) {
if (fgets(str, STR_SIZE, fp) == NULL)
fatal("Error reading file\n");
if (feof(fp))
fatal("not enough lines in file");
if ((sscanf(str, "%f", &val) != 1))
fatal("invalid file format");
vect[i * grid_cols + j + k * grid_rows * grid_cols] = val;
}
fclose(fp);
}
void writeoutput(float *vect, int grid_rows, int grid_cols, int layers,
char *file) {
int i, j, k, index = 0;
FILE *fp;
char str[STR_SIZE];
if ((fp = fopen(file, "w")) == 0)
printf("The file was not opened\n");
for (i = 0; i < grid_rows; i++)
for (j = 0; j < grid_cols; j++)
for (k = 0; k < layers; k++) {
sprintf(str, "%d\t%g\n", index,
vect[i * grid_cols + j + k * grid_rows * grid_cols]);
fputs(str, fp);
index++;
}
fclose(fp);
}
void computeTempCPU(float *pIn, float *tIn, float *tOut, int nx, int ny, int nz,
float Cap, float Rx, float Ry, float Rz, float dt,
int numiter) {
float ce, cw, cn, cs, ct, cb, cc;
float stepDivCap = dt / Cap;
ce = cw = stepDivCap / Rx;
cn = cs = stepDivCap / Ry;
ct = cb = stepDivCap / Rz;
cc = 1.0 - (2.0 * ce + 2.0 * cn + 3.0 * ct);
int c, w, e, n, s, b, t;
int x, y, z;
int i = 0;
do {
for (z = 0; z < nz; z++)
for (y = 0; y < ny; y++)
for (x = 0; x < nx; x++) {
c = x + y * nx + z * nx * ny;
w = (x == 0) ? c : c - 1;
e = (x == nx - 1) ? c : c + 1;
n = (y == 0) ? c : c - nx;
s = (y == ny - 1) ? c : c + nx;
b = (z == 0) ? c : c - nx * ny;
t = (z == nz - 1) ? c : c + nx * ny;
tOut[c] = tIn[c] * cc + tIn[n] * cn + tIn[s] * cs + tIn[e] * ce +
tIn[w] * cw + tIn[t] * ct + tIn[b] * cb +
(dt / Cap) * pIn[c] + ct * amb_temp;
}
float *temp = tIn;
tIn = tOut;
tOut = temp;
i++;
} while (i < numiter);
}
float accuracy(float *arr1, float *arr2, int len) {
float err = 0.0;
int i;
for (i = 0; i < len; i++) {
err += (arr1[i] - arr2[i]) * (arr1[i] - arr2[i]);
}
return (float)sqrt(err / len);
}
void usage(int argc, char **argv) {
fprintf(stderr,
"Usage: %s <rows/cols> <layers> <iterations> <powerFile> <tempFile> "
"<outputFile>\n",
argv[0]);
fprintf(
stderr,
"\t<rows/cols> - number of rows/cols in the grid (positive integer)\n");
fprintf(stderr,
"\t<layers> - number of layers in the grid (positive integer)\n");
fprintf(stderr, "\t<iteration> - number of iterations\n");
fprintf(stderr, "\t<powerFile> - name of the file containing the initial "
"power values of each cell\n");
fprintf(stderr, "\t<tempFile> - name of the file containing the initial "
"temperature values of each cell\n");
fprintf(stderr, "\t<outputFile - output file\n");
exit(1);
}
int main(int argc, char **argv) {
cudaSetDevice(0);
if (argc != 7) {
usage(argc, argv);
}
char *pfile, *tfile, *ofile;
int iterations = atoi(argv[3]);
pfile = argv[4];
tfile = argv[5];
ofile = argv[6];
int numCols = atoi(argv[1]);
int numRows = atoi(argv[1]);
int layers = atoi(argv[2]);
/* calculating parameters*/
float dx = chip_height / numRows;
float dy = chip_width / numCols;
float dz = t_chip / layers;
float Cap = FACTOR_CHIP * SPEC_HEAT_SI * t_chip * dx * dy;
float Rx = dy / (2.0 * K_SI * t_chip * dx);
float Ry = dx / (2.0 * K_SI * t_chip * dy);
float Rz = dz / (K_SI * dx * dy);
float max_slope = MAX_PD / (FACTOR_CHIP * t_chip * SPEC_HEAT_SI);
float dt = PRECISION / max_slope;
float *powerIn, *tempOut, *tempIn, *tempCopy;
int size = numCols * numRows * layers;
powerIn = (float *)calloc(size, sizeof(float));
tempCopy = (float *)malloc(size * sizeof(float));
tempIn = (float *)calloc(size, sizeof(float));
tempOut = (float *)calloc(size, sizeof(float));
float *answer = (float *)calloc(size, sizeof(float));
readinput(powerIn, numRows, numCols, layers, pfile);
readinput(tempIn, numRows, numCols, layers, tfile);
memcpy(tempCopy, tempIn, size * sizeof(float));
hotspot_opt1(powerIn, tempIn, tempOut, numCols, numRows, layers, Cap, Rx, Ry,
Rz, dt, iterations);
computeTempCPU(powerIn, tempCopy, answer, numCols, numRows, layers, Cap, Rx,
Ry, Rz, dt, iterations);
float acc = accuracy(tempOut, answer, numRows * numCols * layers);
printf("Accuracy: %e\n", acc);
writeoutput(tempOut, numRows, numCols, layers, ofile);
free(tempIn);
free(tempOut);
free(powerIn);
return 0;
}