首页 > 解决方案 > Strassen-Winograd 算法

问题描述

我的任务是用 C++ 编写 Strassen-Winograd 算法。我已经写了两次,但我的代码的第一个版本不起作用。结果矩阵左下角的结果是正确的。即使 N = 64+,我的第二个版本的运行速度也比简单算法慢。

我究竟做错了什么?

重要提示:我不允许在递归和结构中使用动态矩阵。此外,最好不要复制,使用子矩阵角元素的坐标进行乘法运算。

不正确:

#include "pch.h"
#include<iostream>
#include<cstdio>
#include<conio.h>
#include<cstdlib>
#include<cmath>
#include<ctime>
#pragma comment(linker, "/STACK:5813243000")
using namespace std;
const int sizs = 256;

void vivod(int matrix[][256], int n);
void Matrix_Add(int a[][256], int b[][256], int c[][256], int n, int x1, int y1, int x2, int y2);
void Matrix_Sub(int a[][256], int b[][256], int c[][256], int n, int x1, int y1, int x2, int y2);
void Matrix_Multiply(int a[][256], int b[][256], int c[][256], int x1, int y1, int x2, int y2, int n);
void strassen(int a[][256], int b[][256], int c[][256], int m, int n, int x1, int y1, int x2, int y2);
void Naive_Multiply(int a[][256], int b[][256], int c[][256], int n)
{
    for (int i = 0; i < n; i++) {
        for (int j = 0; j < n; j++) {
            c[i][j] = 0;
            for (int t = 0; t < n; t++) {
                c[i][j] = c[i][j] + a[i][t] * b[t][j];
            }
        }
    }
}
void Multiply(int a[][256], int b[][256], int c[][256], int x1, int y1, int x2, int y2)
{
    for (int i = 0; i < 2; i++) {
        for (int j = 0; j < 2; j++) {
            c[i][j] = 0;
            for (int t = 0; t < 2; t++) {
                c[i][j] = c[i][j] + a[i][t] * b[t][j];
            }
        }
    }
}
int main()
{
    setlocale(LC_ALL, "Russian");
    int n;
    cout << "Введите число n:";
    cin >> n;
    const int m = 256;
    int A[m][m];
    int B[m][m];
    int C[m][m];
    int k[m][m];
    for (int i = 0; i < n; i++)
        for (int j = 0; j < n; j++)
            A[i][j] = 0;
    for (int i = 0; i < n; i++)
        for (int j = 0; j < n; j++)
            B[i][j] = 0;
    for (int i = 0; i < n; i++)
        for (int j = 0; j < n; j++)
            A[i][j] = rand() % 10;
    for (int i = 0; i < n; i++)
        for (int j = 0; j < n; j++)
            B[i][j] = rand() % 10;
    cout << "First Matrix:" << endl;
    vivod(A, n);
    cout << "Second Matrix:" << endl;
    vivod(B, n);
    int begin = clock();
    //for (int i =0; i < 100; i++)
    Naive_Multiply(A, B, k, n);
    int end = clock();
    cout << "Naive Multiply time: " << end - begin  << endl;
    vivod(k, n);
    int begin2 = clock();
    //for (int i = 0; i < 100; i++)
    strassen(A, B, C, n, n, 0, 0, 0, 0);
    int end2 = clock();
    cout << "time: " << end2 - begin2 << endl;
    vivod(C, n);
    system("pause");
    return 0;
}

void strassen(int a[][256], int b[][256], int c[][256], int m, int n, int x1, int y1, int x2, int y2) {
    m = n / 2;
    if (m != 1)
    {
        int s1[sizs][sizs];
        int s2[sizs][sizs];
        int s3[sizs][sizs];
        int s4[sizs][sizs];
        int s5[sizs][sizs];
        int s6[sizs][sizs];
        int s7[sizs][sizs];
        int s8[sizs][sizs];
        int m1[sizs][sizs];
        int m2[sizs][sizs];
        int m3[sizs][sizs];
        int m4[sizs][sizs];
        int m5[sizs][sizs];
        int m6[sizs][sizs];
        int m7[sizs][sizs];
        int t1[sizs][sizs];
        int t2[sizs][sizs];
        int c11[sizs][sizs];
        int c12[sizs][sizs];
        int c21[sizs][sizs];
        int c22[sizs][sizs];
        Matrix_Add(a, a, s1, m, n - m, n - 2 * m, n - m, n - m);
        Matrix_Sub(s1, a, s2, m, 0, 0, n - 2 * m, n - 2 * m);
        Matrix_Sub(a, a, s3, m, n - 2 * m, n - 2 * m, n - m, n - 2 * m);
        Matrix_Sub(a, s2, s4, m, n - 2 * m, n - m, 0, 0);
        Matrix_Sub(b, b, s5, m, n - 2 * m, n - m, n - 2 * m, n - 2 * m);
        Matrix_Sub(b, s5, s6, m, n - m, n - m, 0, 0);
        Matrix_Sub(b, b, s7, m, n - m, n - m, n - 2 * m, n - m);
        Matrix_Sub(s6, b, s8, m, 0, 0, n - m, n - 2 * m);
        strassen(s2, s6, m1, m, m, 0, 0, 0, 0);
        strassen(a, b, m2, m, m, n - 2 * m, n - 2 * m, n - 2 * m, n - 2 * m);
        strassen(a, b, m3, m, m, n - 2 * m, n - m, n - m, n - 2 * m);
        strassen(s3, s7, m4, m, m, 0, 0, 0, 0);
        strassen(s1, s5, m5, m, m, 0, 0, 0, 0);
        strassen(s4, b, m6, m, m, 0, 0, n - m, n - m);
        strassen(a, s8, m7, m, m, n - m, n - m, 0, 0);

        Matrix_Add(m1, m2, t1, m, 0, 0, 0, 0);
        Matrix_Add(t1, m4, t2, m, 0, 0, 0, 0);
        Matrix_Add(m2, m3, c11, m, 0, 0, 0, 0);
        Matrix_Sub(t2, m7, c21, m, 0, 0, 0, 0);
        Matrix_Add(t1, m5, c12, m, 0, 0, 0, 0);
        Matrix_Add(c12, m6, c12, m, 0, 0, 0, 0);
        Matrix_Add(t2, m5, c22, m, 0, 0, 0, 0);
        for (int i = 0; i < n / 2; i++)
        {
            for (int j = 0; j < n / 2; j++)
            {
                c[i + n - 2 * m][j + n - 2 * m] = c11[i][j];
                c[i + n - 2 * m][j + n - m] = c12[i][j];
                c[i + n - m][j + n - 2 * m] = c21[i][j];
                c[i + n - m][j + n - m] = c22[i][j];
            }
        }
    }
    else
    {
        Matrix_Multiply(a, b, c, x1, y1, x2, y2, n);
    }
}
void vivod(int matrix[][256], int n)
{
    for (int i = 0; i < n; i++)
    {
        for (int j = 0; j < n; j++)
        {
            cout << matrix[i][j] << " ";
        }
        cout << endl;
    }
    cout << endl;
}
void Matrix_Add(int a[][256], int b[][256], int c[][256], int n, int x1, int y1, int x2, int y2)
{
    for (int i = 0; i < n; i++) {
        for (int j = 0; j < n; j++) {
            c[i][j] = a[i + x1][j + y1] + b[i + x2][j + y2];
        }
    }
}

void Matrix_Sub(int a[][256], int b[][256], int c[][256], int n, int x1, int y1, int x2, int y2)
{
    for (int i = 0; i < n; i++) {
        for (int j = 0; j < n; j++) {
            c[i][j] = a[i + x1][j + y1] - b[i + x2][j + y2];
        }
    }
}
void Matrix_Multiply(int a[][256], int b[][256], int c[][256], int x1, int y1, int x2, int y2, int n)
{
    for (int i = 0; i < n; i++) {
        for (int j = 0; j < n; j++) {
            c[i][j] = 0;
            for (int t = 0; t < n; t++) {
                c[i][j] = c[i][j] + a[x1 + i][y1 + t] * b[x2 + t][y2 + j];
            }
        }
    }
}

慢,最后期限默认1:

#include "pch.h"
#include <stdio.h>
#include <iostream>
#include<cstdlib>
#include<ctime>
#pragma comment(linker, "/STACK:44333338")
int deadline;
using namespace std;
const int n = 16;
void StrVin(int(&A)[n][n], int(&B)[n][n], int(&C)[n][n], int size);
void add(int(&A)[n][n], int(&B)[n][n], int(&C)[n][n], int size);
void sub(int(&A)[n][n], int(&B)[n][n], int(&C)[n][n], int size);
void Multiply(int a[n][n], int b[n][n], int c[n][n]);
void vivod(int a[n][n]);
void naivemultiplication(int a[n][n], int b[n][n], int c[n][n], int size);
int main()
{
    setlocale(LC_ALL, "Russian");
    cout << "Введите предел: ";
    cin >> deadline;
    int a[n][n];
    int b[n][n];
    int c[n][n];

    for (int i = 0; i < n; i++)
    {
        for (int j = 0; j < n; j++)
        {
            a[i][j] = rand() % 10;
            b[i][j] = rand() % 10;
            c[i][j] = 0;
        }
    }
    cout << "First Matrix" << endl;
    //vivod(a);
    cout << "Second Matrix" << endl;
    //vivod(b);
    cout << "Strassen-Winograd ";
    long double begin2 = clock();
    StrVin(a, b, c, n);
    long double end2 = clock();
    cout << "Time: " << (end2 - begin2) / CLOCKS_PER_SEC << endl;
    //vivod(c);
    cout << "Standart method ";
    long double begin1 = clock();
    naivemultiplication(a, b, c, n);
    long double end1 = clock();
    cout << "Time: " << (end1 - begin1) / CLOCKS_PER_SEC << endl;
    //vivod(c);
    system("pause");
    return 0;
}
void vivod(int a[n][n])
{
    for (int i = 0; i < n; i++)
    {
        for (int j = 0; j < n; j++)
            cout << a[i][j] << " ";
        cout << endl;
    }
    cout << endl;
}

void add(int(&A)[n][n], int(&B)[n][n], int(&C)[n][n], int size)
{
    for (int i = 0; i < size; i++)
    {
        for (int j = 0; j < size; j++)
        {
            C[i][j] = A[i][j] + B[i][j];
        }
    }
}

void sub(int(&A)[n][n], int(&B)[n][n], int(&C)[n][n], int size)
{

    for (int i = 0; i < size; i++)
    {
        for (int j = 0; j < size; j++)
        {
            C[i][j] = A[i][j] - B[i][j];
        }
    }
}
void Multiply(int a[n][n], int b[n][n], int c[n][n])
{
    for (int i = 0; i < deadline*2; i++) {
        for (int j = 0; j < deadline*2; j++) {
            c[i][j] = 0;
            for (int t = 0; t < deadline*2; t++) {
                c[i][j] = c[i][j] + a[i][t] * b[t][j];
            }
        }
    }
}

void StrVin(int(&A)[n][n], int(&B)[n][n], int(&C)[n][n], int size)
{
    int half = size / 2;
    if (half != deadline)
    {
        int A11[n][n], A12[n][n], A21[n][n], A22[n][n], B11[n][n], B12[n][n], B21[n][n], B22[n][n], C11[n][n], C12[n][n], C21[n][n], C22[n][n];
        int S1[n][n], S2[n][n], S3[n][n], S4[n][n], S5[n][n], S6[n][n], S7[n][n], S8[n][n];
        int P1[n][n], P2[n][n], P3[n][n], P4[n][n], P5[n][n], P6[n][n], P7[n][n];
        int T1[n][n], T2[n][n];
        int K[n][n];
        for (int i = 0; i < half; i++) {
            for (int j = 0; j < half; j++) {
                A11[i][j] = A[i][j];
                A12[i][j] = A[i][j + half];
                A21[i][j] = A[i + half][j];
                A22[i][j] = A[i + half][j + half];
                B11[i][j] = B[i][j];
                B12[i][j] = B[i][j + half];
                B21[i][j] = B[i + half][j];
                B22[i][j] = B[i + half][j + half];
            }
        }
        for (int i = 0; i < half; i++)
        {
            for (int j = 0; j < half; j++)
            {
                C11[i][j] = C[i][j];
            }
        }
        for (int i = 0; i < half; i++)
        {
            for (int j = half; j < size; j++)
            {
                C12[i][j - half] = C[i][j];
            }
        }
        for (int i = half; i < size; i++)
        {
            for (int j = 0; j < half; j++)
            {
                C21[i - half][j] = C[i][j];
            }
        }
        for (int i = half; i < size; i++)
        {
            for (int j = half; j < size; j++)
            {
                C22[i - half][j - half] = C[i][j];
            }
        }
        add(A21, A22, S1, half);
        sub(S1, A11, S2, half);
        sub(A11, A21, S3, half);
        sub(A12, S2, S4, half);
        sub(B12, B11, S5, half);
        sub(B22, S5, S6, half);
        sub(B22, B12, S7, half);
        sub(S6, B21, S8, half);
        StrVin(S2, S6, P1, half);
        StrVin(A11, B11, P2, half);
        StrVin(A12, B21, P3, half);
        StrVin(S3, S7, P4, half);
        StrVin(S1, S5, P5, half);
        StrVin(S4, B22, P6, half);
        StrVin(A22, S8, P7, half);
        add(P1, P2, T1, half);
        add(T1, P4, T2, half);
        add(P2, P3, C11, half);
        add(T1, P5, K, half);
        add(K, P6, C12, half);
        sub(T2, P7, C21, half);
        add(T2, P5, C22, half);

        for (int i = 0; i < half; i++)
        {
            for (int j = 0; j < half; j++)
            {
                C[i][j] = C11[i][j];
                C[i][j + half] = C12[i][j];
                C[i + half][j] = C21[i][j];
                C[i + half][j + half] = C22[i][j];
            }
        }
    }
    else
    {
        Multiply(A, B, C);
    }
}

void naivemultiplication(int a[n][n], int b[n][n], int c[n][n], int size)
{
    for (int k = 0; k < size; k++)
    {
        for (int i = 0; i < size; i++)
        {
            c[k][i] = 0;
            for (int j = 0; j < size; j++)
            {
                c[k][i] += a[k][j] * b[j][i];
            }
        }
    }
}

标签: c++algorithmarray-algorithmsstrassen

解决方案


虽然它比经典的矩阵-矩阵产品渐进地快,但即使是写得非常好的 Strassen Winograd 算法也不会比小尺寸的天真的实现更快(并且在现代架构64×64上非常小)。n×n这是因为在递归过程中有相当大的矩阵加法开销。

这就是为什么所有优化的实现都有一个截止尺寸,在这个尺寸下它们将切换到优化的经典矩阵矩阵乘积。截止大小在很大程度上取决于架构和底层算法的质量,但它可能n远高于 512。

如果你想认真地实现,我建议阅读一些关于算法的详细描述(你可以从wikipedia开始,然后继续他们的参考)。如果您将其作为一个玩具项目进行,至少在某个阈值(您需要调整)下停止递归,并尝试更大的尺寸,直到您发现可测量的性能提升。

关于内存分配,由于您可以在 main 函数中分配内存,因此您可以计算临时矩阵所需的内存上限T*N^2*(1/4+1/16+...)(其中T是每个递归深度的临时数,N是矩阵的大小)和 re-在每个递归步骤中使用该内存。为了简化事情,从一个递归步骤开始,它立即切换到经典算法,直到你设法获得任何速度改进——再次阅读有关算法实现细节的现有文献。


推荐阅读