multithreading - tbb matrix mulitiplication stack overflow error c++ -
i'm trying matrix multiplication using task in intel tbb, algorithm i'm using strassen's algorithm...
here code main()
:
#include "matrix.h" #include "tbb/tick_count.h" using namespace tbb; using namespace std; //here how call mattask class matrica callparallel(matrx& a, matrix& b, matrix& c, int n){ mattask& t = *new (task::allocate_root ()) mattask (a, b, &c, n); task::spawn_root_and_wait (t); return c; } int main(){ int rows, columns; matrix serialc; cout << "*******************\n" << "if rows , columns < 6 enter matric manualy\n" << "********************\n" <<endl; cout << "enter rows matrix a: "; cin >> rows; cout << "enter columns matrix a: "; cin >> columns; matrix a(rows, columns); if(rows > 5 && columns > 5){ a.creatematrixautomatic(); }else { a.creatematricmanualy(); } cout << "enter rows matrix b: "; cin >> rows; cout << "enter columns matrix b: "; cin >> columns; matrix b(rows, columns); if(rows > 5 && columns > 5){ b.creatematrixautomatic(); }else { b.creatematricmanualy(); } cout << "matrix a: " << endl; a.printmatrix(); cout << "matrix b: " << endl; b.printmatrix(); cout << "matrix c: " << endl; tick_count start_time = tick_count::now(); serialc.multserial(a, b); tick_count end_time = tick_count::now(); cout << "\ntime serial: " << (end_time - start_time).seconds() * 1000 << " ms" << endl; serialc.printmatrix(); //creating matrix result , calling parallel algorithm matrix parallelc(rows, columns); parallelc = callparallel(a, b, parallelc, rows); //this here prints result matrix parallelc.printmatrix(); system("pause"); }
here matrix.cpp code:
#include "matrix.h" matrix::matrix(){} matrix::matrix(int rows, int columns){ vr = rows; kol = columns; } void matrix::creatematrixautomatic(){ for(int = 0; < vr; i++){ (int j = 0; j < kol; j++){ int number = rand() % 5 + 1; matr[i][j] = number; } } } void matrix::creatematricmanualy(){ cout << "enter elements: " << endl; for(int = 0; < vr; i++){ (int j = 0; j < kol; j++){ cout << "enter [" << << "]" << "[" << j << "] element: "; cin >> matr[i][j]; } } } void matrix::printmatrix(){ (int = 0; < vr; i++){ (int j = 0; j < kol; j++){ cout << matr[i][j] << " "; } cout << endl << endl; } } void matrix::multserial(matrix& a, matrix& b){ for(int = 0; < a.vr; i++){ for(int j = 0; j < b.kol; j++){ matr[i][j] = 0; for(int k = 0; k < b.vr; k++){ matr[i][j] += (a.matr[i][k] * b.matr[k][j]); vr = a.vr; kol = b.kol; } } } } void matrix::substract(matrix& a, matrix& b, int dim){ (int = 0; < dim; i++) { (int j = 0; j < dim; j++) { matr[i][j] = a.matr[i][j] - b.matr[i][j]; } } } void matrix::add(matrix& a, matrix& b, int dim){ (int = 0; < dim; i++) { (int j = 0; j < dim; j++) { matr[i][j] = a.matr[i][j] + b.matr[i][j]; } } }
and here mattask class , matrica.h class
#pragma once #include <iostream> #include <tbb/task.h> using namespace tbb; using namespace std; class matrix{ public: int vr, kol; int matr[100][100]; matrix(); matrix(int rows, int columns); void creatematrixautomatic(); void creatematricmanualy(); void printmatrix(); void multserial(matrix&, matrix&); void add(matrix& a, matrix& b, int dim); void substract(matrix& a, matrix& b, int dim); }; class mattask: public task{ public: matrix a, b; matrix* c; int dimension; mattask(matrix& _a, matrix& _b, matrix* _c, int dim): a(_a), b(_b), c(_c), dimension(dim){} task* execute(){ if(dimension == 1){ c->multserial(a, b); } else { int newdimension = dimension/2; task_list list; int count = 1; matrica a11(newdimension, newdimension), a12(newdimension, newdimension), a21(newdimension, newdimension), a22(newdimension, newdimension), b11(newdimension, newdimension), b12(newdimension, newdimension), b21(newdimension, newdimension), b22(newdimension, newdimension), *c11, *c12, *c21, *c22, p1(newdimension, newdimension), *p2, *p3, *p4, *p5, *p6, *p7, aresult(newdimension, newdimension), bresult(newdimension, newdimension); //delimo matrice u 4 podmatrice for(int = 0; < newdimension; i++){ for(int j = 0; j < newdimension; j++){ (a11).matr[i][j] = a.matr[i][j]; (a12).matr[i][j] = a.matr[i][j + newdimension]; (a21).matr[i][j] = a.matr[i + newdimension][j]; (a22).matr[i][j] = a.matr[i + newdimension][j + newdimension]; (b11).matr[i][j] = b.matr[i][j]; (b12).matr[i][j] = b.matr[i][j + newdimension]; (b21).matr[i][j] = b.matr[i + newdimension][j]; (b22).matr[i][j] = b.matr[i + newdimension][j + newdimension]; } } //racunamo p1...p7 //p1 = (a11 + a22) * (b11 + b22) aresult.add(a11, a22, newdimension); //a11 + a22 bresult.add(b11, b22, newdimension); //b11 + b22 count++; //mattask& = *new( allocate_child() ) mattask(aresult, bresult, &p1, newdimension); //lista.push_back(a); lista.push_back(*new (allocate_child()) mattask(aresult, bresult, &p1, newdimension)); //p2 = (a21 + a22) * b11 //aresult.add(a21, a22, newdimension); //a21 + a22 //count++; ////lista.push_back(*new (allocate_child()) mattask(aresult, b11, p2, newdimension)); ////p3 = a11 * (b12 - b22) //bresult.substract(b12, b22, newdimension); // b12 - b22 //count++; ////lista.push_back(*new (allocate_child()) mattask(a11, bresult, p3, newdimension)); ////p4 = a22 * (b21 - b11) //bresult.substract(b21, b11, newdimension); // b21 - b11 //count++; ////lista.push_back(*new (allocate_child()) mattask(a22, bresult, p4, newdimension)); ////p5 = (a11 + a12) * b22 //aresult.add(a11, a12, newdimension); // a11 + a12 //count++; ////lista.push_back(*new (allocate_child()) mattask(aresult, b22, p5, newdimension)); ////p6 = (a21 - a11) * (b11 + b12) //bresult.add(b11, b12, newdimension); //b11 + b12 //aresult.substract(a21, a11, newdimension); //a21 - a11 //count++; ////lista.push_back(*new (allocate_child()) mattask(aresult, bresult, p6, newdimension)); ////p7 = (a12 - a22) * (b21 + b22) //bresult.add(b21, b22, newdimension); //b21 + b22 //aresult.substract(a12, a22, newdimension); //a12 - a22 //count++; ////lista.push_back(*new (allocate_child()) mattask(aresult, bresult, p7, newdimension)); set_ref_count(count); //spawn(a); spawn_and_wait_for_all(list); //spawn_and_wait_for_all(a); //racunamo d11, d12, d21, d22 //c11 = p1 + p4 - p5 + p7 //aresult.add(p1, p4, newdimension); // p1 + p4 //bresult.add(aresult, p7, newdimension); // p1 + p4 + p7 //c11.oduzmi(bresult, p5, newdimension); // c11 = p1 + p4 + p7 - p5 //// c12 = p3 + p5 //c12.add(p3, p5, newdimension); // //// c21 = p2 + p4 //c21.add(p2, p4, newdimension); //// c22 = p1 + p3 - p2 + p6 //aresult.add(p1, p3, newdimension); //p1 + p3 //bresult.add(aresult, p6, newdimension); //p1 + p3 + p6 //c22.substract(bresult, p2, newdimension); // c22 = p1 + p3 + p6 - p2 //grouping results obtained in single matrix: //for (int = 0; < novadimenzija ; i++) { // (int j = 0 ; j < novadimenzija ; j++) { // c.matr[i][j] = c11.matr[i][j]; // c.matr[i][j + newdimension] = c12.matr[i][j]; // c.matr[i + newdimension][j] = c21.matr[i][j]; // c.matr[i + newdimension][j + newdimension] = c22.matr[i][j]; // } //} } return null; } };
as see names of functions , classes not in english, don't think problem, because code straightforward.
i error:
unhandled exception @ 0x01193787 in mnozenjematrica.exe: 0xc00000fd: stack overflow.
i think error occurres in line spawn_and_wait_for_all(lista)
, nut i'm not sure.
can please take @ code , me solve problem. maybe i'm not calling correctly functions, not know realy, please help. thank you
it blocking-style parallelism plus heavy use of stack matrices result in stack overflow. so, each task reserve stack data , calls spawn_root_and_wait_for_all
in turn executes instance of same task recursively keeps growing stack.
use continuation-style programming , avoid allocating huge data on stack (and inside task if possible - reduces task allocator efficiency).
Comments
Post a Comment