diff options
Diffstat (limited to 'homeworks/hw-9.tex')
-rw-r--r-- | homeworks/hw-9.tex | 250 |
1 files changed, 250 insertions, 0 deletions
diff --git a/homeworks/hw-9.tex b/homeworks/hw-9.tex new file mode 100644 index 0000000..9d5693a --- /dev/null +++ b/homeworks/hw-9.tex @@ -0,0 +1,250 @@ +% Created 2023-12-11 Mon 19:24 +% Intended LaTeX compiler: pdflatex +\documentclass[11pt]{article} +\usepackage[utf8]{inputenc} +\usepackage[T1]{fontenc} +\usepackage{graphicx} +\usepackage{longtable} +\usepackage{wrapfig} +\usepackage{rotating} +\usepackage[normalem]{ulem} +\usepackage{amsmath} +\usepackage{amssymb} +\usepackage{capt-of} +\usepackage{hyperref} +\notindent \notag \usepackage{amsmath} \usepackage[a4paper,margin=1in,portrait]{geometry} +\author{Elizabeth Hunt} +\date{\today} +\title{Homework 9} +\hypersetup{ + pdfauthor={Elizabeth Hunt}, + pdftitle={Homework 9}, + pdfkeywords={}, + pdfsubject={}, + pdfcreator={Emacs 28.2 (Org mode 9.7-pre)}, + pdflang={English}} +\begin{document} + +\maketitle +\setlength\parindent{0pt} + +\section{Question One} +\label{sec:org69bed2d} + +With a \texttt{matrix\_dimension} set to 700, I consistently see about a 3x improvement in performance on my +10-thread machine. The serial implementation gives an average \texttt{0.189s} total runtime, while the below +parallel implementation runs in about \texttt{0.066s} after the cpu cache has filled on the first run. + +\begin{verbatim} +#include <math.h> +#include <omp.h> +#include <stdio.h> +#include <stdlib.h> +#include <time.h> + +#define matrix_dimension 700 + +int n = matrix_dimension; +float sum; + +int main() { + float A[n][n]; + float x0[n]; + float b[n]; + float x1[n]; + float res[n]; + + srand((unsigned int)(time(NULL))); + + // not worth parallellization - rand() is not thread-safe + for (int i = 0; i < n; i++) { + for (int j = 0; j < n; j++) { + A[i][j] = ((float)rand() / (float)(RAND_MAX) * 5.0); + } + x0[i] = ((float)rand() / (float)(RAND_MAX) * 5.0); + } + +#pragma omp parallel for private(sum) + for (int i = 0; i < n; i++) { + sum = 0.0; + for (int j = 0; j < n; j++) { + sum += fabs(A[i][j]); + } + A[i][i] += sum; + } + +#pragma omp parallel for private(sum) + for (int i = 0; i < n; i++) { + sum = 0.0; + for (int j = 0; j < n; j++) { + sum += A[i][j]; + } + b[i] = sum; + } + + float tol = 0.0001; + float error = 10.0 * tol; + int maxiter = 100; + int iter = 0; + + while (error > tol && iter < maxiter) { +#pragma omp parallel for + for (int i = 0; i < n; i++) { + float temp_sum = b[i]; + for (int j = 0; j < n; j++) { + temp_sum -= A[i][j] * x0[j]; + } + res[i] = temp_sum; + x1[i] = x0[i] + res[i] / A[i][i]; + } + + sum = 0.0; +#pragma omp parallel for reduction(+ : sum) + for (int i = 0; i < n; i++) { + float val = x1[i] - x0[i]; + sum += val * val; + } + error = sqrt(sum); + +#pragma omp parallel for + for (int i = 0; i < n; i++) { + x0[i] = x1[i]; + } + + iter++; + } + + for (int i = 0; i < n; i++) + printf("x[%d] = %6f \t res[%d] = %6f\n", i, x1[i], i, res[i]); + + return 0; +} + +\end{verbatim} + +\section{Question Two} +\label{sec:orgbeace25} + +I only see lowerings in performance (likely due to overhead) on my machine using OpenMP until +\texttt{matrix\_dimension} becomes quite large, about \texttt{300} in testing. At \texttt{matrix\_dimension=1000}, I see another +about 3x improvement in total runtime (including initialization \& I/O which was untouched, so, even further +improvements could be made) on my 10-thread machine; from around \texttt{0.174} seconds to \texttt{.052}. + +\begin{verbatim} +#include <math.h> +#include <stdio.h> +#include <stdlib.h> +#include <time.h> + +#ifdef _OPENMP +#include <omp.h> +#else +#define omp_get_num_threads() 0 +#define omp_set_num_threads(int) 0 +#define omp_get_thread_num() 0 +#endif + +#define matrix_dimension 1000 + +int n = matrix_dimension; +float ynrm; + +int main() { + float A[n][n]; + float v0[n]; + float v1[n]; + float y[n]; + // + // create a matrix + // + // not worth parallellization - rand() is not thread-safe + srand((unsigned int)(time(NULL))); + float a = 5.0; + for (int i = 0; i < n; i++) { + for (int j = 0; j < n; j++) { + A[i][j] = ((float)rand() / (float)(RAND_MAX)*a); + } + v0[i] = ((float)rand() / (float)(RAND_MAX)*a); + } + // + // modify the diagonal entries for diagonal dominance + // -------------------------------------------------- + // + for (int i = 0; i < n; i++) { + float sum = 0.0; + for (int j = 0; j < n; j++) { + sum = sum + fabs(A[i][j]); + } + A[i][i] = A[i][i] + sum; + } + // + // generate a vector of ones + // ------------------------- + // + for (int j = 0; j < n; j++) { + v0[j] = 1.0; + } + // + // power iteration test + // -------------------- + // + float tol = 0.0000001; + float error = 10.0 * tol; + float lam1, lam0; + int maxiter = 100; + int iter = 0; + + while (error > tol && iter < maxiter) { +#pragma omp parallel for + for (int i = 0; i < n; i++) { + y[i] = 0; + for (int j = 0; j < n; j++) { + y[i] = y[i] + A[i][j] * v0[j]; + } + } + + ynrm = 0.0; +#pragma omp parallel for reduction(+ : ynrm) + for (int i = 0; i < n; i++) { + ynrm += y[i] * y[i]; + } + ynrm = sqrt(ynrm); + +#pragma omp parallel for + for (int i = 0; i < n; i++) { + v1[i] = y[i] / ynrm; + } + +#pragma omp parallel for + for (int i = 0; i < n; i++) { + y[i] = 0.0; + for (int j = 0; j < n; j++) { + y[i] += A[i][j] * v1[j]; + } + } + + lam1 = 0.0; +#pragma omp parallel for reduction(+ : lam1) + for (int i = 0; i < n; i++) { + lam1 += v1[i] * y[i]; + } + + error = fabs(lam1 - lam0); + lam0 = lam1; + +#pragma omp parallel for + for (int i = 0; i < n; i++) { + v0[i] = v1[i]; + } + + iter++; + } + + printf("in %d iterations, eigenvalue = %f\n", iter, lam1); +} +\end{verbatim} + +\section{Question Three} +\label{sec:org33439e0} +\url{https://static.simponic.xyz/lizfcm.pdf} +\end{document}
\ No newline at end of file |