summaryrefslogtreecommitdiff
path: root/cuda-global
diff options
context:
space:
mode:
Diffstat (limited to 'cuda-global')
-rwxr-xr-xcuda-global/golbin709816 -> 709816 bytes
-rw-r--r--cuda-global/src/game.cu9
-rw-r--r--cuda-global/src/main.cu56
-rw-r--r--cuda-global/timing-study/output--1000-1000.txt4
-rw-r--r--cuda-global/timing-study/output--1000-1250.txt4
-rw-r--r--cuda-global/timing-study/output--1000-1500.txt4
-rw-r--r--cuda-global/timing-study/output--1000-1750.txt4
-rw-r--r--cuda-global/timing-study/output--1000-2000.txt4
-rw-r--r--cuda-global/timing-study/output--1000-250.txt4
-rw-r--r--cuda-global/timing-study/output--1000-500.txt4
-rw-r--r--cuda-global/timing-study/output--1000-750.txt4
-rw-r--r--cuda-global/timing-study/slurm-3610476.err-notch0810
-rw-r--r--cuda-global/timing-study/slurm-3610476.out-notch0810
-rw-r--r--cuda-global/timing-study/slurm-3611549.err-notch0810
-rw-r--r--cuda-global/timing-study/slurm-3611549.out-notch0810
-rwxr-xr-xcuda-global/timing-study/timing_study.sh18
16 files changed, 89 insertions, 26 deletions
diff --git a/cuda-global/gol b/cuda-global/gol
index 4fb8b81..85af8de 100755
--- a/cuda-global/gol
+++ b/cuda-global/gol
Binary files differ
diff --git a/cuda-global/src/game.cu b/cuda-global/src/game.cu
index b0b5f61..abb5ed9 100644
--- a/cuda-global/src/game.cu
+++ b/cuda-global/src/game.cu
@@ -1,11 +1,12 @@
#include "game.cuh"
+// Count the number of life neighbors a cell has
__device__ int neighbors(struct GAME game, int x, int y) {
int n = 0;
for (int dy = -1; dy <= 1; dy++) {
for (int dx = -1; dx <= 1; dx++) {
- if (!(dx == 0 && dy == 0) && (x+dx) >= 0 && (y+dy) >= 0 && (x+dx) < game.width+(game.padding*2) && (y+dy) < game.height+(game.padding*2)) {
+ if (!(dx == 0 && dy == 0) && (x+dx) > 0 && (y+dy) > 0 && (x+dx) < game.width+(game.padding*2) && (y+dy) < game.height+(game.padding*2)) {
if (game.grid[(y+dy) * (game.width+game.padding*2) + (x+dx)]) {
n++;
}
@@ -15,11 +16,14 @@ __device__ int neighbors(struct GAME game, int x, int y) {
return n;
}
+// Compute the next iteration of a board
+// We have to give it the newGrid as a parameter otherwise
+// each block will be computing its own version of the next grid
__global__ void next(struct GAME game, unsigned char* newGrid) {
int idy = blockDim.y * blockIdx.y + threadIdx.y;
int idx = blockDim.x * blockIdx.x + threadIdx.x;
- if (idy <= game.height+game.padding*2 && idx <= game.width+game.padding*2) {
+ if (idy < game.height+game.padding*2 && idx < game.width+game.padding*2) {
int my_neighbors = neighbors(game, idx, idy);
int my_coord = idy * (game.width+game.padding*2) + idx;
newGrid[my_coord] = 0; // It's possible that there are artifacts from the last iteration
@@ -37,6 +41,7 @@ __global__ void next(struct GAME game, unsigned char* newGrid) {
}
}
+// Randomly assign life value to each cell
void randomize(struct GAME* game) {
for (int y = game->padding; y < game->height+game->padding; y++) {
for (int x = game->padding; x < game->width+game->padding; x++) {
diff --git a/cuda-global/src/main.cu b/cuda-global/src/main.cu
index f906b2a..41c2abf 100644
--- a/cuda-global/src/main.cu
+++ b/cuda-global/src/main.cu
@@ -18,7 +18,7 @@
*/
#define BLOCK 32
#define PADDING 10
-#define VERBOSE 1
+//#define VERBOSE 1
#define SEED 100
// gpuErrchk source: https://stackoverflow.com/questions/14038589/what-is-the-canonical-way-to-check-for-errors-using-the-cuda-runtime-api
@@ -33,14 +33,16 @@ true) {
}
}
+// Do the simulation
void simulate(int argc, char** argv) {
srand(SEED);
- clock_t totalStart = clock();
+ clock_t global_start = clock();
char* filename;
struct GAME game;
game.padding = PADDING;
int iterations, log_each_step;
if (argc == 7) {
+ // Parse the arguments
filename = argv[2];
game.width = atoi(argv[3]);
game.height = atoi(argv[4]);
@@ -60,6 +62,7 @@ void simulate(int argc, char** argv) {
game.grid = (unsigned char*)malloc(size);
memset(game.grid, 0, size);
+ // Choose where to read initial position
if (strcmp(filename, "random") == 0) {
randomize(&game);
} else {
@@ -68,43 +71,53 @@ void simulate(int argc, char** argv) {
char iteration_file[1024];
+ // Allocate device memory
unsigned char* grid_d;
unsigned char* newGrid;
gpuErrchk(cudaMalloc(&grid_d, size));
- gpuErrchk(cudaMemcpy(grid_d, game.grid, size, cudaMemcpyHostToDevice));
gpuErrchk(cudaMalloc(&newGrid, size));
+ gpuErrchk(cudaMemcpy(grid_d, game.grid, size, cudaMemcpyHostToDevice)); // Copy the initial grid to the device
+ free(game.grid);
+ game.grid = grid_d; // Use the device copy
+ // The grid that we will copy results
unsigned char* grid_h = (unsigned char*)malloc(size);
unsigned char* temp;
- game.grid = grid_d;
-
- int grid_num = (int)ceil((game.width+(2*game.padding))/(float)BLOCK);
- dim3 dim_grid(grid_num, grid_num, 1);
+ // Calculate grid width for kernel
+ int grid_width = (int)ceil((game.width+(2*game.padding))/(float)BLOCK);
+ int grid_height = (int)ceil((game.height+(2*game.padding))/(float)BLOCK);
+ dim3 dim_grid(grid_width, grid_height, 1);
dim3 dim_block(BLOCK, BLOCK, 1);
- cudaEvent_t startLife, stopLife;
- cudaEventCreate(&startLife);
- cudaEventCreate(&stopLife);
- double timeComputingLife = 0;
- float localTime = 0;
+ // Timing
+ cudaEvent_t start, end;
+ cudaEventCreate(&start);
+ cudaEventCreate(&end);
+ double time_computing_life = 0;
+ float local_time = 0;
for (int i = 0; i <= iterations; i++) {
+ // Iteration 0 will just be the initial grid
if (i > 0) {
- cudaEventRecord(startLife);
+ cudaEventRecord(start);
+ // Compute the next grid
next<<<dim_grid, dim_block>>>(game, newGrid);
- cudaEventRecord(stopLife);
- cudaEventSynchronize(stopLife);
- cudaEventElapsedTime(&localTime, startLife, stopLife);
- timeComputingLife += localTime/1000;
+ cudaEventRecord(end);
+ cudaEventSynchronize(end);
+ cudaEventElapsedTime(&local_time, start, end);
+ time_computing_life += local_time/1000;
+ // Swap game.grid and newGrid
temp = game.grid;
game.grid = newGrid;
newGrid = temp;
}
if (log_each_step) {
+ // If we are logging each step, perform IO operations
gpuErrchk(cudaMemcpy(grid_h, game.grid, size, cudaMemcpyDeviceToHost));
#ifdef VERBOSE
+ // Print the board without the padding elements
printf("\n===Iteration %i===\n", i);
for (int y = game.padding; y < game.height+game.padding; y++) {
for (int x = game.padding; x < game.width+game.padding; x++) {
@@ -114,6 +127,7 @@ void simulate(int argc, char** argv) {
}
printf("===End iteration %i===\n", i);
#endif
+ // Save to a file
sprintf(iteration_file, "output/iteration-%07d.bin", i);
temp = game.grid;
game.grid = grid_h;
@@ -122,13 +136,7 @@ void simulate(int argc, char** argv) {
}
}
- clock_t totalEnd = clock();
- printf("\n===Timing===\nTime computing life: %f\nClock time: %f\n", timeComputingLife, ((double)totalEnd - (double)totalStart)/CLOCKS_PER_SEC);
-
- cudaFree(&newGrid);
- cudaFree(&grid_d);
- cudaFree(&game.grid);
- free(grid_h);
+ printf("\n===Timing===\nTime computing life: %f\nClock time: %f\n", time_computing_life, ((double)clock() - (double)global_start)/CLOCKS_PER_SEC);
}
int main(int argc, char** argv) {
diff --git a/cuda-global/timing-study/output--1000-1000.txt b/cuda-global/timing-study/output--1000-1000.txt
new file mode 100644
index 0000000..3792efc
--- /dev/null
+++ b/cuda-global/timing-study/output--1000-1000.txt
@@ -0,0 +1,4 @@
+
+===Timing===
+Time computing life: 0.169687
+Clock time: 1.560000
diff --git a/cuda-global/timing-study/output--1000-1250.txt b/cuda-global/timing-study/output--1000-1250.txt
new file mode 100644
index 0000000..9081eb5
--- /dev/null
+++ b/cuda-global/timing-study/output--1000-1250.txt
@@ -0,0 +1,4 @@
+
+===Timing===
+Time computing life: 0.254989
+Clock time: 2.240000
diff --git a/cuda-global/timing-study/output--1000-1500.txt b/cuda-global/timing-study/output--1000-1500.txt
new file mode 100644
index 0000000..c2fafe8
--- /dev/null
+++ b/cuda-global/timing-study/output--1000-1500.txt
@@ -0,0 +1,4 @@
+
+===Timing===
+Time computing life: 0.354361
+Clock time: 3.050000
diff --git a/cuda-global/timing-study/output--1000-1750.txt b/cuda-global/timing-study/output--1000-1750.txt
new file mode 100644
index 0000000..557165e
--- /dev/null
+++ b/cuda-global/timing-study/output--1000-1750.txt
@@ -0,0 +1,4 @@
+
+===Timing===
+Time computing life: 0.480174
+Clock time: 4.070000
diff --git a/cuda-global/timing-study/output--1000-2000.txt b/cuda-global/timing-study/output--1000-2000.txt
new file mode 100644
index 0000000..91a9d45
--- /dev/null
+++ b/cuda-global/timing-study/output--1000-2000.txt
@@ -0,0 +1,4 @@
+
+===Timing===
+Time computing life: 0.619636
+Clock time: 5.220000
diff --git a/cuda-global/timing-study/output--1000-250.txt b/cuda-global/timing-study/output--1000-250.txt
new file mode 100644
index 0000000..0808a20
--- /dev/null
+++ b/cuda-global/timing-study/output--1000-250.txt
@@ -0,0 +1,4 @@
+
+===Timing===
+Time computing life: 0.029867
+Clock time: 0.330000
diff --git a/cuda-global/timing-study/output--1000-500.txt b/cuda-global/timing-study/output--1000-500.txt
new file mode 100644
index 0000000..913b15e
--- /dev/null
+++ b/cuda-global/timing-study/output--1000-500.txt
@@ -0,0 +1,4 @@
+
+===Timing===
+Time computing life: 0.059907
+Clock time: 0.540000
diff --git a/cuda-global/timing-study/output--1000-750.txt b/cuda-global/timing-study/output--1000-750.txt
new file mode 100644
index 0000000..e3b8c5d
--- /dev/null
+++ b/cuda-global/timing-study/output--1000-750.txt
@@ -0,0 +1,4 @@
+
+===Timing===
+Time computing life: 0.110954
+Clock time: 1.000000
diff --git a/cuda-global/timing-study/slurm-3610476.err-notch081 b/cuda-global/timing-study/slurm-3610476.err-notch081
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/cuda-global/timing-study/slurm-3610476.err-notch081
diff --git a/cuda-global/timing-study/slurm-3610476.out-notch081 b/cuda-global/timing-study/slurm-3610476.out-notch081
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/cuda-global/timing-study/slurm-3610476.out-notch081
diff --git a/cuda-global/timing-study/slurm-3611549.err-notch081 b/cuda-global/timing-study/slurm-3611549.err-notch081
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/cuda-global/timing-study/slurm-3611549.err-notch081
diff --git a/cuda-global/timing-study/slurm-3611549.out-notch081 b/cuda-global/timing-study/slurm-3611549.out-notch081
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/cuda-global/timing-study/slurm-3611549.out-notch081
diff --git a/cuda-global/timing-study/timing_study.sh b/cuda-global/timing-study/timing_study.sh
new file mode 100755
index 0000000..ef7ebcb
--- /dev/null
+++ b/cuda-global/timing-study/timing_study.sh
@@ -0,0 +1,18 @@
+#!/bin/bash
+#SBATCH --time=0:30:00 # walltime, abbreviated by -t
+#SBATCH --nodes=1 # number of cluster nodes, abbreviated by -N
+#SBATCH -o slurm-%j.out-%N # name of the stdout, using the job number (%j) and the first node (%N)
+#SBATCH -e slurm-%j.err-%N # name of the stderr, using job and first node values
+#SBATCH --ntasks=1 # number of MPI tasks, abbreviated by -n
+# additional information for allocated clusters
+#SBATCH --account=notchpeak-shared-short # account - abbreviated by -A
+#SBATCH --partition=notchpeak-shared-short # partition, abbreviated by -p
+#SBATCH --gres=gpu:k80:1
+
+cd $HOME/gol/cuda-global
+
+iterations=1000
+for size in 250 500 750 1000 1250 1500 1750 2000
+do
+ srun ./gol simulate random $size $size $iterations 1 > timing-study/output-$cores-$iterations-$size.txt
+done