diff options
author | Nao Pross <np@0hm.ch> | 2021-07-31 17:30:44 +0200 |
---|---|---|
committer | Nao Pross <np@0hm.ch> | 2021-07-31 17:30:44 +0200 |
commit | d7723ea05fe173c5203961dbc74aadf548338fb0 (patch) | |
tree | 12247fbfe674bc7b0f364c19b265ce8bb1059bb3 /buch/papers/multiplikation/code/MM | |
parent | Fix commas and details in references.bib (diff) | |
parent | Merge pull request #58 from Kuehnee/master (diff) | |
download | SeminarMatrizen-d7723ea05fe173c5203961dbc74aadf548338fb0.tar.gz SeminarMatrizen-d7723ea05fe173c5203961dbc74aadf548338fb0.zip |
Merge remote-tracking branch 'upstream/master'
Diffstat (limited to '')
-rwxr-xr-x | buch/papers/multiplikation/code/MM | bin | 0 -> 26848 bytes | |||
-rwxr-xr-x | buch/papers/multiplikation/code/MM.c | 465 | ||||
-rw-r--r-- | buch/papers/multiplikation/code/MM.py | 311 |
3 files changed, 776 insertions, 0 deletions
diff --git a/buch/papers/multiplikation/code/MM b/buch/papers/multiplikation/code/MM Binary files differnew file mode 100755 index 0000000..f07985f --- /dev/null +++ b/buch/papers/multiplikation/code/MM diff --git a/buch/papers/multiplikation/code/MM.c b/buch/papers/multiplikation/code/MM.c new file mode 100755 index 0000000..04c4dab --- /dev/null +++ b/buch/papers/multiplikation/code/MM.c @@ -0,0 +1,465 @@ +#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <time.h>
+#include <omp.h>
+#include "c_matrix.h"
+#include <gsl/gsl_cblas.h>
+#include <string.h>
+
+void MM(int *A, int *B, int *C, int n);
+void openMP_MM(int *A, int *B, int *C, int n);
+void winograd(int *A, int *B, int *C, int n);
+int winograd_inner(int *a, int *b, int n);
+void run_algo(void (*algo)(), char alog_name[], int print);
+void run_algo_cblas(int print);
+void MM_dc(int *A, int *B, int *C, int n);
+void strassen(int *A, int *B, int *C, int n);
+void printMatrix(int *C, int n);
+void printMatrix_double(double *C, int n);
+void split(int *in, int *out, int n, int col, int row);
+void join(int *in, int *out, int n, int col, int row);
+void add(int *A, int *B, int *C, int n);
+void sub(int *A, int *B, int *C, int n);
+void multiply(int *A, int *B, int *C, int n);
+
+int main() {
+ // omp_set_dynamic(0);
+ // omp_set_num_threads(4);
+// run_algo(openMP_MM, "openMP_MM",0);
+ run_algo(MM_dc, "MM_dc",0);
+ run_algo(strassen, "strassen",0);
+
+ run_algo(MM, "MM", 0);
+ // run_algo(winograd, "winograd", 0);
+ run_algo_cblas(0);
+
+ return 0;
+}
+
+void MM(int *A, int *B, int *C, int n) {
+ for (int i = 0; i < n; ++i) {
+ for (int j = 0; j < n; ++j) {
+ int sum = 0;
+ for (int k = 0; k < n; ++k) {
+ sum += (*((A + i * n) + k)) * (*((B + k * n) + j));
+ }
+ *((C + i * n) + j) = sum;
+ }
+ }
+}
+
+int winograd_inner(int *a, int *b, int n){
+ int ab = 0;
+ if(n%2==0)
+ {
+ int xi = 0;
+ int etha = 0;
+ for(int i = 0; i<n/2;++i)
+ {
+ xi += a[2*i]*a[2*i+1];
+ etha += b[2*i]*b[2*i+1];
+ ab += (a[2*i]+b[2*i+1])*(a[2*i+1]+b[2*i]);
+ }
+ ab = ab-etha-xi;
+ }
+ return ab;
+ }
+
+ void winograd(int *A, int *B, int *C, int n) {
+
+ int xi_array[n];
+ int etha_array[n];
+ int xi = 0;
+ int etha = 0;
+ int ab = 0;
+
+ for (int i = 0; i < n; ++i) {
+ xi = 0;
+ etha = 0;
+ for(int k = 0;k<n/2;++k)
+ {
+ xi += (*((A + i * n) + 2*k))*(*((A + i * n) + (2*k+1)));
+ etha += (*((B + 2*k * n) + i))*(*((B + (2*k+1) * n) + i));
+ }
+ xi_array[i] = xi;
+ etha_array[i] = etha;
+ }
+
+ for (int i = 0; i < n; ++i) {
+ for (int j = 0; j < n; ++j) {
+ ab = 0;
+ for(int k = 0;k<n/2;++k)
+ {
+ ab += ((*((A + i * n) + 2*k))+(*((B + (2*k+1) * n) + j)))*((*((A + i * n) + (2*k+1)))+(*((B + 2*k * n) + j)));
+ }
+ *((C + i * n) + j) = ab-etha_array[j]-xi_array[i];
+ }
+ }
+
+
+
+
+ // for (int i = 0; i < n; ++i) {
+ // int *a = (int*) malloc(n * sizeof(int));
+ // for(int k = 0; k<n; ++k)
+ // {
+ // a[k] = (*((A + i * n) + k));
+ // }
+ //
+ // for (int j = 0; j < n; ++j) {
+ // int *b = (int*) malloc(n * sizeof(int));
+ // for(int k = 0; k<n; ++k)
+ // {
+ // b[k] =(*((B + k * n) + j));
+ // }
+ // *((C + i * n) + j) = winograd_inner(a,b,n);
+ // }
+ // }
+ }
+
+
+void openMP_MM(int *A, int *B, int *C, int n) {
+
+ #pragma omp parallel for
+ for (int i = 0; i < n; ++i) {
+ for (int j = 0; j < n; ++j) {
+ int sum = 0;
+ for (int k = 0; k < n; ++k) {
+ sum += (*((A + i * n) + k)) * (*((B + k * n) + j));
+ }
+ *((C + i * n) + j) = sum;
+ }
+ }
+}
+
+void MM_dc(int *A, int *B, int *C, int n) {
+ if (n <= 2) {
+ MM((int*) A, (int*) B, (int*) C, n);
+ } else {
+ int *A11 = (int*) malloc(n / 2 * n / 2 * sizeof(int));
+ int *A12 = (int*) malloc(n / 2 * n / 2 * sizeof(int));
+ int *A21 = (int*) malloc(n / 2 * n / 2 * sizeof(int));
+ int *A22 = (int*) malloc(n / 2 * n / 2 * sizeof(int));
+ int *B11 = (int*) malloc(n / 2 * n / 2 * sizeof(int));
+ int *B12 = (int*) malloc(n / 2 * n / 2 * sizeof(int));
+ int *B21 = (int*) malloc(n / 2 * n / 2 * sizeof(int));
+ int *B22 = (int*) malloc(n / 2 * n / 2 * sizeof(int));
+
+ split((int*) A, (int*) A11, n / 2, 0, 0);
+ split((int*) A, (int*) A12, n / 2, 0, n / 2);
+ split((int*) A, (int*) A21, n / 2, n / 2, 0);
+ split((int*) A, (int*) A22, n / 2, n / 2, n / 2);
+ split((int*) B, (int*) B11, n / 2, 0, 0);
+ split((int*) B, (int*) B12, n / 2, 0, n / 2);
+ split((int*) B, (int*) B21, n / 2, n / 2, 0);
+ split((int*) B, (int*) B22, n / 2, n / 2, n / 2);
+
+ int *tmp1 = (int*) malloc(n / 2 * n / 2 * sizeof(int));
+ int *tmp2 = (int*) malloc(n / 2 * n / 2 * sizeof(int));
+ int *tmp3 = (int*) malloc(n / 2 * n / 2 * sizeof(int));
+ int *tmp4 = (int*) malloc(n / 2 * n / 2 * sizeof(int));
+ int *tmp5 = (int*) malloc(n / 2 * n / 2 * sizeof(int));
+ int *tmp6 = (int*) malloc(n / 2 * n / 2 * sizeof(int));
+ int *tmp7 = (int*) malloc(n / 2 * n / 2 * sizeof(int));
+ int *tmp8 = (int*) malloc(n / 2 * n / 2 * sizeof(int));
+
+ MM_dc((int*) A11, (int*) B11, (int*) tmp1, n / 2);
+ MM_dc((int*) A12, (int*) B21, (int*) tmp2, n / 2);
+ MM_dc((int*) A11, (int*) B12, (int*) tmp3, n / 2);
+ MM_dc((int*) A12, (int*) B22, (int*) tmp4, n / 2);
+ MM_dc((int*) A21, (int*) B11, (int*) tmp5, n / 2);
+ MM_dc((int*) A22, (int*) B21, (int*) tmp6, n / 2);
+ MM_dc((int*) A21, (int*) B12, (int*) tmp7, n / 2);
+ MM_dc((int*) A22, (int*) B22, (int*) tmp8, n / 2);
+
+ free(A11);
+ free(A12);
+ free(A21);
+ free(A22);
+ free(B11);
+ free(B12);
+ free(B21);
+ free(B22);
+
+ int *C11 = (int*) malloc(n / 2 * n / 2 * sizeof(int));
+ int *C12 = (int*) malloc(n / 2 * n / 2 * sizeof(int));
+ int *C21 = (int*) malloc(n / 2 * n / 2 * sizeof(int));
+ int *C22 = (int*) malloc(n / 2 * n / 2 * sizeof(int));
+
+ add((int*) tmp1, (int*) tmp2, (int*) C11, n / 2);
+ add((int*) tmp3, (int*) tmp4, (int*) C12, n / 2);
+ add((int*) tmp5, (int*) tmp6, (int*) C21, n / 2);
+ add((int*) tmp7, (int*) tmp8, (int*) C22, n / 2);
+
+ free(tmp1);
+ free(tmp2);
+ free(tmp3);
+ free(tmp4);
+ free(tmp5);
+ free(tmp6);
+ free(tmp7);
+ free(tmp8);
+
+ join((int*) C11, (int*) C, n / 2, 0, 0);
+ join((int*) C12, (int*) C, n / 2, 0, n / 2);
+ join((int*) C21, (int*) C, n / 2, n / 2, 0);
+ join((int*) C22, (int*) C, n / 2, n / 2, n / 2);
+
+ free(C11);
+ free(C12);
+ free(C21);
+ free(C22);
+
+ }
+}
+
+void strassen(int *A, int *B, int *C, int n) {
+ if (n <= 2) {
+
+ int P, Q, R, S, T, U, V;
+ P = ((*((A + 0 * n) + 0)) + (*((A + 1 * n) + 1)))
+ * ((*((B + 0 * n) + 0)) + (*((B + 1 * n) + 1)));
+ Q = ((*((A + 1 * n) + 0)) + (*((A + 1 * n) + 1)))
+ * ((*((B + 0 * n) + 0)));
+ R = ((*((A + 0 * n) + 0)))
+ * ((*((B + 0 * n) + 1)) - (*((B + 1 * n) + 1)));
+ S = ((*((A + 1 * n) + 1)))
+ * ((*((B + 1 * n) + 0)) - (*((B + 0 * n) + 0)));
+ T = ((*((A + 0 * n) + 0)) + (*((A + 0 * n) + 1)))
+ * ((*((B + 1 * n) + 1)));
+ U = ((*((A + 1 * n) + 0)) - (*((A + 0 * n) + 0)))
+ * ((*((B + 0 * n) + 0)) + (*((B + 0 * n) + 1)));
+ V = ((*((A + 0 * n) + 1)) - (*((A + 1 * n) + 1)))
+ * ((*((B + 1 * n) + 0)) + (*((B + 1 * n) + 1)));
+ (*((C + 0 * n) + 0)) = P + S - T + V;
+ (*((C + 0 * n) + 1)) = R + T;
+ (*((C + 1 * n) + 0)) = Q + S;
+ (*((C + 1 * n) + 1)) = P + R - Q + U;
+
+ } else {
+ int *A11 = (int*) malloc(n / 2 * n / 2 * sizeof(int));
+ int *A12 = (int*) malloc(n / 2 * n / 2 * sizeof(int));
+ int *A21 = (int*) malloc(n / 2 * n / 2 * sizeof(int));
+ int *A22 = (int*) malloc(n / 2 * n / 2 * sizeof(int));
+ int *B11 = (int*) malloc(n / 2 * n / 2 * sizeof(int));
+ int *B12 = (int*) malloc(n / 2 * n / 2 * sizeof(int));
+ int *B21 = (int*) malloc(n / 2 * n / 2 * sizeof(int));
+ int *B22 = (int*) malloc(n / 2 * n / 2 * sizeof(int));
+
+ split((int*) A, (int*) A11, n / 2, 0, 0);
+ split((int*) A, (int*) A12, n / 2, 0, n / 2);
+ split((int*) A, (int*) A21, n / 2, n / 2, 0);
+ split((int*) A, (int*) A22, n / 2, n / 2, n / 2);
+ split((int*) B, (int*) B11, n / 2, 0, 0);
+ split((int*) B, (int*) B12, n / 2, 0, n / 2);
+ split((int*) B, (int*) B21, n / 2, n / 2, 0);
+ split((int*) B, (int*) B22, n / 2, n / 2, n / 2);
+
+ int *P = (int*) malloc(n / 2 * n / 2 * sizeof(int));
+ int *Q = (int*) malloc(n / 2 * n / 2 * sizeof(int));
+ int *R = (int*) malloc(n / 2 * n / 2 * sizeof(int));
+ int *S = (int*) malloc(n / 2 * n / 2 * sizeof(int));
+ int *T = (int*) malloc(n / 2 * n / 2 * sizeof(int));
+ int *U = (int*) malloc(n / 2 * n / 2 * sizeof(int));
+ int *V = (int*) malloc(n / 2 * n / 2 * sizeof(int));
+
+ int *addA = (int*) malloc(n / 2 * n / 2 * sizeof(int));
+ int *addB = (int*) malloc(n / 2 * n / 2 * sizeof(int));
+
+ add((int*) A11, (int*) A22, (int*) addA, n / 2);
+ add((int*) B11, (int*) B22, (int*) addB, n / 2);
+ strassen((int*) addA, (int*) addB, (int*) P, n / 2);
+
+ add((int*) A21, (int*) A22, (int*) addA, n / 2);
+ strassen((int*) addA, (int*) B11, (int*) Q, n / 2);
+
+ sub((int*) B12, (int*) B22, (int*) addB, n / 2);
+ strassen((int*) A11, (int*) addB, (int*) R, n / 2);
+
+ sub((int*) B21, (int*) B11, (int*) addB, n / 2);
+ strassen((int*) A22, (int*) addB, (int*) S, n / 2);
+
+ add((int*) A11, (int*) A12, (int*) addA, n / 2);
+ strassen((int*) addA, (int*) B22, (int*) T, n / 2);
+
+ sub((int*) A21, (int*) A11, (int*) addA, n / 2);
+ add((int*) B11, (int*) B12, (int*) addB, n / 2);
+ strassen((int*) addA, (int*) addB, (int*) U, n / 2);
+
+ sub((int*) A12, (int*) A22, (int*) addA, n / 2);
+ add((int*) B21, (int*) B22, (int*) addB, n / 2);
+ strassen((int*) addA, (int*) addB, (int*) V, n / 2);
+
+ free(A11);
+ free(A12);
+ free(A21);
+ free(A22);
+ free(B11);
+ free(B12);
+ free(B21);
+ free(B22);
+ free(addA);
+ free(addB);
+
+ int *C11 = (int*) malloc(n / 2 * n / 2 * sizeof(int));
+ int *C12 = (int*) malloc(n / 2 * n / 2 * sizeof(int));
+ int *C21 = (int*) malloc(n / 2 * n / 2 * sizeof(int));
+ int *C22 = (int*) malloc(n / 2 * n / 2 * sizeof(int));
+
+ int *resAdd1 = (int*) malloc(n / 2 * n / 2 * sizeof(int));
+ int *resAdd2 = (int*) malloc(n / 2 * n / 2 * sizeof(int));
+
+ add((int*) R, (int*) T, (int*) C12, n / 2);
+ add((int*) Q, (int*) S, (int*) C21, n / 2);
+
+ add((int*) P, (int*) S, (int*) resAdd1, n / 2);
+ add((int*) resAdd1, (int*) V, (int*) resAdd2, n / 2);
+ sub((int*) resAdd2, (int*) T, (int*) C11, n / 2);
+
+ add((int*) P, (int*) R, (int*) resAdd1, n / 2);
+ add((int*) resAdd1, (int*) U, (int*) resAdd2, n / 2);
+ sub((int*) resAdd2, (int*) Q, (int*) C22, n / 2);
+
+ free(P);
+ free(Q);
+ free(R);
+ free(S);
+ free(T);
+ free(U);
+ free(V);
+ free(resAdd1);
+ free(resAdd2);
+
+ join((int*) C11, (int*) C, n / 2, 0, 0);
+ join((int*) C12, (int*) C, n / 2, 0, n / 2);
+ join((int*) C21, (int*) C, n / 2, n / 2, 0);
+ join((int*) C22, (int*) C, n / 2, n / 2, n / 2);
+
+ free(C11);
+ free(C12);
+ free(C21);
+ free(C22);
+ }
+}
+
+void add(int *A, int *B, int *C, int n) {
+ for (int i = 0; i < n; i++) {
+ for (int j = 0; j < n; j++) {
+ *((C + i * n) + j) = *((A + i * n) + j) + *((B + i * n) + j);
+ }
+ }
+}
+
+void sub(int *A, int *B, int *C, int n) {
+ for (int i = 0; i < n; i++) {
+ for (int j = 0; j < n; j++) {
+ *((C + i * n) + j) = *((A + i * n) + j) - *((B + i * n) + j);
+ }
+ }
+}
+
+void multiply(int *A, int *B, int *C, int n) {
+ int mul;
+
+ for (int i = 0; i < n; ++i) {
+ for (int j = 0; j < n; ++j) {
+ mul = (*((A + i * n) + j)) * (*((B + i * n) + j));
+ *((C + i * n) + j) = mul;
+ }
+ }
+}
+
+void split(int *in, int *out, int n, int col, int row) {
+ for (int i1 = 0, i2 = col; i1 < n; i1++, i2++)
+ for (int j1 = 0, j2 = row; j1 < n; j1++, j2++) {
+ *((out + i1 * n) + j1) = *((in + i2 * n * 2) + j2);
+
+ }
+}
+
+void join(int *in, int *out, int n, int col, int row) {
+ for (int i1 = 0, i2 = col; i1 < n; i1++, i2++)
+ for (int j1 = 0, j2 = row; j1 < n; j1++, j2++)
+ *((out + i2 * n * 2) + j2) = *((in + i1 * n) + j1);
+}
+
+void printMatrix(int *C, int n) {
+ for (int i = 0; i < n; ++i) {
+ for (int j = 0; j < n; ++j) {
+ printf("%d ", *((C + i * n) + j));
+ }
+ printf("\n");
+ }
+}
+
+void printMatrix_double(double *C, int n) {
+ for (int i = 0; i < n; ++i) {
+ for (int j = 0; j < n; ++j) {
+ printf("%.0f ", *((C + i * n) + j));
+ }
+ printf("\n");
+ }
+}
+
+void run_algo(void (*algo)(), char alog_name[], int print)
+{
+ FILE *fptr;
+
+ char fileName[40] = "meas/";
+ strcat(fileName, alog_name);
+ strcat(fileName, ".txt");
+ fptr = fopen(fileName, "w");
+
+
+ for(int i=0; i<n_arrays; ++i)
+ {
+ for(int j = 0; j<1; ++j)
+ {
+ int *C = (int*) malloc(n[i] * n[i] * sizeof(int));
+ double dtime = omp_get_wtime();
+ algo(Ap[i], Bp[i], (int*) C, n[i]);
+ dtime = omp_get_wtime() - dtime;
+ // printf("The %s program took %f seconds to execute \n", alog_name, dtime);
+ fprintf(fptr, "%f,%d\n", dtime, n[i]);
+
+ if(print==1)
+ {
+ printMatrix((int*)C, n[i]);
+ }
+ free(C);
+ }
+ }
+ fclose(fptr);
+
+}
+
+void run_algo_cblas(int print)
+{
+
+ FILE *fptr;
+
+ fptr = fopen("meas/blas.txt", "w");
+ for(int i=0; i<n_arrays; ++i)
+ {
+ for(int j = 0; j<1; ++j)
+ {
+ double *dC = (double*) malloc(n[i] * n[i] * sizeof(double));
+ double dtime = omp_get_wtime();
+ cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, n[i], n[i], n[i], 1.0, dAp[i], n[i],
+ dBp[i], n[i], 0.0, dC, n[i]);
+ dtime = omp_get_wtime() - dtime;
+ // printf("The cblas program took %f seconds to execute \n", dtime);
+ fprintf(fptr, "%f,%d\n",dtime, n[i]);
+
+ if(print==1)
+ {
+ printMatrix_double( (double*)dC, n[i]);
+ }
+
+ free(dC);
+ }
+ }
+ fclose(fptr);
+
+}
diff --git a/buch/papers/multiplikation/code/MM.py b/buch/papers/multiplikation/code/MM.py new file mode 100644 index 0000000..626b82d --- /dev/null +++ b/buch/papers/multiplikation/code/MM.py @@ -0,0 +1,311 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on Fri Mar 19 07:31:29 2021 + +@author: nunigan +""" +import numpy as np +import time +import matplotlib.pyplot as plt +from scipy.optimize import curve_fit +import tikzplotlib +def MM(A, B): + n = np.shape(A)[0] + C = np.zeros((n, n)) + for i in range(n): + for j in range(n): + C[i, j] = 0 + for k in range(n): + C[i, j] += A[i, k]*B[k, j] + return C + + +def MM_dc(A, B): + n = np.shape(A)[0] + if(n <= 2): + C = np.zeros((n, n)) + C[0, 0] = A[0, 0]*B[0, 0]+A[0, 1]*B[1, 0] + C[0, 1] = A[0, 0]*B[0, 1]+A[0, 1]*B[1, 1] + C[1, 0] = A[1, 0]*B[0, 0]+A[1, 1]*B[1, 0] + C[1, 1] = A[1, 0]*B[0, 1]+A[1, 1]*B[1, 1] + return C + else: + A11, A12, A21, A22 = A[:n//2, :n//2], A[:n//2, n//2:], A[n//2:, :n//2], A[n//2:, n//2:] + B11, B12, B21, B22 = B[:n//2, :n//2], B[:n//2, n//2:], B[n//2:, :n//2], B[n//2:, n//2:] + C11 = MM_dc(A11, B11) + MM_dc(A12, B21) + C12 = MM_dc(A11, B12) + MM_dc(A12, B22) + C21 = MM_dc(A21, B11) + MM_dc(A22, B21) + C22 = MM_dc(A21, B12) + MM_dc(A22, B22) + C = np.vstack((np.hstack((C11, C12)), np.hstack((C21, C22)))) + return C + + +def strassen(A, B): + n = np.shape(A)[0] + if(n <= 2): + C = np.zeros((n, n)) + P = (A[0, 0]+A[1, 1])*(B[0, 0]+B[1, 1]) + Q = (A[1, 0]+A[1, 1])*B[0, 0] + R = A[0, 0]*(B[0, 1]-B[1, 1]) + S = A[1, 1]*(B[1, 0]-B[0, 0]) + T = (A[0, 0]+A[0, 1])*B[1, 1] + U = (A[1, 0]-A[0, 0])*(B[0, 0]+B[0, 1]) + V = (A[0, 1]-A[1, 1])*(B[1, 0]+B[1, 1]) + C[0, 0] = P+S-T+V + C[0, 1] = R+T + C[1, 0] = Q+S + C[1, 1] = P+R-Q+U + return C + else: + m = n//2 + A11, A12, A21, A22 = A[:m, :m], A[:m, m:], A[m:, :m], A[m:, m:] + B11, B12, B21, B22 = B[:m, :m], B[:m, m:], B[m:, :m], B[m:, m:] + P = strassen((A11+A22),(B11+B22)) + Q = strassen((A21+A22),B11) + R = strassen(A11,(B12-B22)) + S = strassen(A22,(B21-B11)) + T = strassen((A11+A12),B22) + U = strassen((A21-A11),(B11+B12)) + V = strassen((A12-A22),(B21+B22)) + + C11 = P+S-T+V + C12 = R+T + C21 = Q+S + C22 = P+R-Q+U + + C = np.vstack((np.hstack((C11, C12)), np.hstack((C21, C22)))) + return C + +def winograd_inner(a, b): + n = np.shape(a)[0] + if n%2 == 0: + xi = np.sum(a[::2]*a[1::2]) + etha = np.sum(b[::2]*b[1::2]) + # print("xi = {}, etha = {}".format(xi, etha)) + ab = np.sum((a[::2]+b[1::2])*(a[1::2]+b[::2]))-xi-etha + else: + xi = np.sum(a[0:-1:2]*a[1::2]) + etha = np.sum(b[0:-1:2]*b[1::2]) + ab = np.sum((a[0:-1:2]+b[1::2])*(a[1::2]+b[0:-1:2]))-xi-etha+a[-1]*b[-1] + return ab + +def winograd(A, B): + m,n = np.shape(A) + n2,p = np.shape(B) + C = np.zeros((m,p)) + for i in range(np.shape(A)[0]): + for j in range(np.shape(B)[1]): + C[i,j] = winograd_inner(A[i,:], B[:,j]) + return C + +def winograd2(A, B): + m,n = np.shape(A) + n2,p = np.shape(B) + C = np.zeros((m,p)) + xi = np.zeros((m)) + eta = np.zeros((p)) + ab = 0 + for i in range(m): + for j in range(n//2): + xi[i] += A[i,2*j]*A[i,2*j+1] + + for i in range(p): + for j in range(n//2): + eta[i] += B[2*j,i]*B[2*j+1,i] + + if n%2==0: + for i in range(m): + for j in range(p): + ab = 0 + for k in range(n//2): + ab += (A[i,2*k]+B[2*k+1,j])*(A[i,2*k+1]+B[2*k,j]) + C[i,j] = ab-eta[j]-xi[i] + else: + for i in range(m): + for j in range(p): + ab = 0 + for k in range(n//2): + ab += (A[i,2*k]+B[2*k+1,j])*(A[i,2*k+1]+B[2*k,j]) + C[i,j] = ab-eta[j]-xi[i]+A[i,-1]*B[-1,j] + + return C + +def test_perfomance(n): + t_mm = [] + t_mm_dc = [] + t_mm_strassen = [] + t_wino = [] + t_np = [] + + for i in n: + A = np.random.randn(i, i) + B = np.random.randn(i, i) + # A = np.random.randint(-100, 100,(i, i)) + # B = np.random.randint(-100, 100,(i, i)) + + start = time.time() + C3 = strassen(A, B) + t_mm_strassen.append(time.time() - start) + + start = time.time() + C1 = MM(A, B) + t_mm.append(time.time() - start) + + start = time.time() + C2 = MM_dc(A, B) + t_mm_dc.append(time.time() - start) + + start = time.time() + C4 = winograd2(A, B) + t_wino.append(time.time() - start) + + start = time.time() + C = A@B + t_np.append(time.time() - start) + + plt.figure(figsize=(13,8)) + plt.rcParams['font.family'] = 'STIXGeneral' + plt.rc('axes', labelsize=23) + plt.rc('xtick', labelsize=23) + plt.rc('ytick', labelsize=23) + plt.plot(n, t_mm, label='Standard', lw=5) + plt.plot(n, t_mm_dc, label='Divide and conquer', lw=5) + plt.plot(n, t_mm_strassen, label='Strassen', lw=5) + plt.plot(n, t_wino, label='Winograd', lw=5) + plt.plot(n, t_np, label='NumPy A@B', lw=5) + plt.legend() + plt.xlabel("n") + plt.ylabel("time (s)") + plt.grid(True) + plt.tight_layout() + # plt.yscale('log') + plt.legend(fontsize=19) + plt.savefig('meas_' + str(max(n))+ '.pdf') + arr = np.array([n, t_mm, t_mm_dc, t_mm_strassen, t_wino, t_np]) + np.savetxt('meas_' + str(max(n))+ '.txt',arr) + return arr + + +def plot(num): + arr = np.loadtxt('meas_{}.txt'.format(num)) + n, t_mm, t_mm_dc, t_mm_strassen, t_wino, t_np = arr + plt.figure(figsize=(13,8)) + plt.rcParams['font.family'] = 'STIXGeneral' + plt.rc('axes', labelsize=23) + plt.rc('xtick', labelsize=23) + plt.rc('ytick', labelsize=23) + plt.plot(n, t_mm, label='3 For Loops', lw=5) + plt.plot(n, t_mm_dc, label='Divide and Conquer', lw=5) + plt.plot(n, t_mm_strassen, label='Strassen', lw=5) + # plt.plot(n, t_wino, label='Winograd', lw=5) + plt.plot(n, t_np, label='NumPy A@B', lw=5) + plt.legend() + plt.xlabel("n") + plt.ylabel("time (s)") + plt.grid(True) + plt.tight_layout() + # plt.yscale('log') + plt.legend(fontsize=19) + plt.savefig('meas_' + str(num)+ '.pdf') + return arr + +def plot_c_res(ave, num): + MM = np.loadtxt("meas/MM.txt", delimiter=',') + # winograd = np.loadtxt("meas/winograd.txt", delimiter=',') + blas = np.loadtxt("meas/blas.txt", delimiter=',') + MM_dc = np.loadtxt("meas/MM_dc.txt", delimiter=',') + strassen = np.loadtxt("meas/strassen.txt", delimiter=',') + + MM_t = MM[:,0] + MM_n = MM[:,1] + MM_t = np.mean(MM_t.reshape(-1,ave),axis=1) + MM_n = np.mean(MM_n.reshape(-1,ave),axis=1) + + MM_dc_t = MM_dc[:,0] + MM_dc_n = MM_dc[:,1] + MM_dc_t = np.mean(MM_dc_t.reshape(-1,ave),axis=1) + MM_dc_n = np.mean(MM_dc_n.reshape(-1,ave),axis=1) + + strassen_t = strassen[:,0] + strassen_n = strassen[:,1] + strassen_t = np.mean(strassen_t.reshape(-1,ave),axis=1) + strassen_n = np.mean(strassen_n.reshape(-1,ave),axis=1) + + # winograd_t = winograd[:,0] + # winograd_n = winograd[:,1] + # winograd_t = np.mean(winograd_t.reshape(-1,ave),axis=1) + # winograd_n = np.mean(winograd_n.reshape(-1,ave),axis=1) + + blas_t = blas[:,0] + blas_n = blas[:,1] + blas_t = np.mean(blas_t.reshape(-1,ave),axis=1) + blas_n = np.mean(blas_n.reshape(-1,ave),axis=1) + + def func(x, a,b): + return b*x**a + + # popt, pcov = curve_fit(func, blas_n, blas_t) + # popt1, pcov2 = curve_fit(func, blas_n, winograd_t) + # popt2, pcov2 = curve_fit(func, blas_n, MM_t) + + plt.figure(figsize=(13,8)) + plt.rcParams['font.family'] = 'STIXGeneral' + plt.rc('axes', labelsize=23) + plt.rc('xtick', labelsize=23) + plt.rc('ytick', labelsize=23) + plt.plot(MM_n, MM_t, label='3 For Loops', lw=5) + # plt.plot(winograd_n, winograd_t, label='Winograd MM', lw=5) + plt.plot(blas_n, blas_t, label='Blas', lw=5) + plt.plot(strassen_n, strassen_t, label='Strassen', lw=5) + plt.plot(MM_dc_n, MM_dc_t, label='Divide and Conquer', lw=5) + plt.xlabel("n") + plt.ylabel("time (s)") + plt.grid(True) + plt.tight_layout() + plt.legend(fontsize=19) + plt.savefig('c_meas_' + str(num)+ '.pdf') + + # plt.plot(blas_n, func(blas_n, *popt), 'r-', label='fit blas: a=%5.5f, b=%5.10f' % tuple(popt)) + # plt.plot(blas_n, func(blas_n, *popt1), 'r-', label='fit winograd: a=%5.5f, b=%5.10f' % tuple(popt1)) + # plt.plot(blas_n, func(blas_n, *popt2), 'r-', label='fit MM: a=%5.5f, b=%5.10f' % tuple(popt2)) + + plt.legend() + + +# test%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +if __name__ == '__main__': + plot_c_res(1, 4096) + + + # plot(8) + # n = np.logspace(1,10,10,base=2,dtype=(np.int)) + # n = np.arange(1,50,2) + A = np.random.randint(-10, 10, (5,3)) + B = np.random.randint(-10, 10, (3,5)) + + C = winograd2(A, B) + C_test = A@B + print(C) + print(C_test) + # print(np.equal(C, C_test)) + + # t_np = test_perfomance(n) + # C = strassen(A, B) + # C_test = A@B + + + # plot_c_res() + # def func(x, a): + # return x**a + + # popt, pcov = curve_fit(func, n, t_np, bounds=(2, 3)) + + + # plt.figure() + # plt.plot(n, t_np, 'b-', label='data') + # plt.plot(n, func(n, *popt), 'r-', label='fit: a=%5.3f' % tuple(popt)) + # plt.xlabel('x') + # plt.ylabel('y') + # plt.legend() +
\ No newline at end of file |