您的位置:首页 > 运维架构 > Linux

linux多线程编程--对三层for循环的优化

2014-12-09 09:33 323 查看
目标:将下面3层for循环的代码进行优化:

#include <iostream>
#include <vector>
#include <pthread.h>

using namespace std;

typedef vector< vector<long> > lvec;

long arr[1000][5000] = {};

long acculate(int, int);
lvec& forarr(lvec&, long, long);

int main()
{
long sum(0);
lvec ivec;
ivec.resize(1000);

for (long i=0; i<1000; ++i) {
for (long j=0; j<5000; ++j)
ivec[i].push_back(j);
}
cout << ivec[999][4999] << endl;

for (long i=0; i<1000; ++i) {
ivec = forarr(ivec, i, 0);
pthread_t id;
int ret = pthread_create(&id, 0, forarr, 0);
if (!ret)
cout << "failed to create thread!" << endl;
ivec = forarr(ivec, i, 2500);
}

cout << ivec[999][4999] << endl;
}

lvec& forarr(lvec& in_vec, long f, long delta) {
for (long j=0; j<2500+delta; ++j)
in_vec[f][j] = acculate(f, j);
return in_vec;
}

// 得到第m帧第n个点的值
long acculate(int m, int n)
{
long sum(0);
for (long k=0; k<5000; ++k)
sum += k;
return sum + m*n;
}
将array改成vector后,运行时间为88s。

下面用8个线程并行计算后,运行时间为11s,

直接贴代码:

#include <iostream>
#include <vector>
#include <pthread.h>
#include <sstream>

using namespace std;

typedef vector< vector<long> > lvec;

long arr[1000][5000] = {};

// 结构体,用来存储线程函数参数
struct para {
lvec* longvec;
long f;
long start_state, end_state;
};

long acculate(int, int);
void* forarr(void*);
vector<int> aver (int, int);

int main()
{
long sum(0);
lvec ivec;
ivec.resize(1000);

// 初始化
for (long i=0; i<1000; ++i) {
for (long j=0; j<5000; ++j)
ivec[i].push_back(j);
}
cout << ivec[999][4999] << endl;

// 用8个线程去跑,线程数等于处理器数最优
int threads_num = 8;
// 得到每个线程要处理的状态范围
vector<int> i_vec;
i_vec = aver(5000, threads_num);
for (int i=0; i<threads_num; ++i)
if (i!=0)
i_vec[i] = i_vec[i] + i_vec[i-1];

vector<struct para> struct_vec;
vector<pthread_t> pthreadT_vec;
for (long i=0; i<1000; ++i) {
// 清理的操作尤其重要
struct_vec.clear();
pthreadT_vec.clear();

// 完成参数的设定
for (int ii=0; ii<threads_num; ++ii) {
struct para p_struct;
p_struct.longvec = &ivec;
p_struct.f = i;
if (i==0)
p_struct.start_state = 0;
else
p_struct.start_state = i_vec[ii-1];
p_struct.end_state = i_vec[ii];
struct_vec.push_back(p_struct);

pthread_t id;
pthreadT_vec.push_back(id);
}
// 多个线程同时开启
for (int ii=0; ii<threads_num; ++ii)
pthread_create(&pthreadT_vec[ii], NULL, forarr, &struct_vec[ii]);
// 等待线程的结束
for (int ii=0; ii<threads_num; ++ii)
pthread_join(pthreadT_vec[ii], NULL);
}

cout << ivec[999][4999] << endl;
return 0;
}

void forarr0(lvec& in_vec, long f, long st, long ed) {
for (long j=st; j<ed; ++j)
in_vec[f][j] = acculate(f, j);
//return in_vec;
}

void* forarr(void* paralist) {
struct para* p = (struct para*)paralist;
lvec* in_vec = p->longvec; // 用指针
long fra = p->f;
long start_s = p->start_state;
long end_s = p->end_state;
for (long j=start_s; j<end_s; ++j)
(*in_vec)[fra][j] = acculate(fra, j);
pthread_exit(NULL);
//return in_vec;
}

// 得到第m帧第n个点的值
long acculate(int m, int n)
{
long sum(0);
for (long k=0; k<5000; ++k)
sum += k;
return sum + m*n;
}

vector<int> aver (int x, int y) {
vector<int> invec;
int m = x/y, n = x%y;
for (int i=0; i<y; ++i) {
if (i<n)
invec.push_back(m+1);
else
invec.push_back(m);
}
return invec;
}
我的感想:

1 当线程数小于等于cpu数时,线程数与时间基本成反比,我的电脑是4核8cpu的,故用8个线程最为合适,线程再加大,其实多余的线程还是处于等待状态;

2 本函数只用到了linux下多线程处理的两个最基本函数pthread_create和pthread_creat,因为这里线程间数据的读写没有冲突;当可能产生冲突时,就要用到加锁技术,见之后的学习。
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: