您的位置:首页 > 其它

螺旋矩阵 之三

2010-12-31 23:35 155 查看
问题:如何高效的构建一个螺旋矩阵?

前面的文章讨论了两种螺旋矩阵。当N比较小时,可以用模拟法(测试代码中的build_1a和build_1b函数),另外可以将4个for循环体合并到一个(build_2a,build_2b和build_2c函数)。但N比较大时,由于不断的对内存跳跃式访问,CPU cache line命中率很低,定位和载入内存的开销相当大。一种解决方法是,直接计算每个位置对应的值(build_3a和build_3b函数);另一种解决方法则是:将每行拆分成三部份,一部分等于上一行同一列数值减1,中间部分是一断连续的递增或递减的数列(其起始和结束值可由公式算得),最后一部分的数等于上一行同一列数值加1(build_4)。为了测试方便,加了一个build_basic函数,先行后列填充1到N2的等差数列。

测试结果有点出乎意外,效率最高的build_basic、build_3a、build_3b和build_4这几个函数所用时间相当接近,其它几个函数的效率彼此间也相差不大。由于程序的性能瓶颈在于对内存访问的效率,二维数组的部局,CPU的缓存大小,内存页的大小等都对测试结果有很大影响,使得测试结果不精确。下面仅列出一个极端情况下的结果:

值得注意的是,用模拟法构建 5120 * 5120 时,所用时间是 构建 5121 * 5121 的3倍多。

各种方法构建N*N矩阵所有时间(ms)

5119

5120

5121

build_1a

387

1259

331

build_1b

390

1259

331

build_2a

418

1259

325

build_2b

375

1256

312

build_2c

371

1187

312

build_3a

140

137

137

build_3b

134

134

134

build_4

134

134

162

basic

134

131

134

测试代码:

代码//www.cnblogs.com/flyinghearts
#include<iostream>
#include<algorithm>
#include<vector>
#include<ctime>
#include<windows.h>

using std::min;
using std::vector;
using std::cout;

const int N = 5120;
int arr

;

void basic_build(int n)
{
for (int i = 0, s = 1; i < n; ++i)
for (int j = 0; j < n; ++j)
arr[i][j] = s++;
}

void build_1a(int n)
{
const int count = n / 2u;
int s = 0;
for (int i = 0; i < count; ++i) {
const int C = n - 1 - i;
for (int j = i; j < C; ++j) arr[i][j] = ++s;
for (int j = i; j < C; ++j) arr[j][C] = ++s;
for (int j = C; j > i; --j) arr[C][j] = ++s;
for (int j = C; j > i; --j) arr[j][i] = ++s;
}
if (n & 1) arr[count][count] = ++s;
}

void build_1b(int n)
{
const int count = n / 2u;
for (int i = 0; i < count; ++i) {
const int C = n - 1 - i;
const int rr = C - i;
const int s = 4 * i * (n - i) + 1;
for (int j = i, k = s; j < C; ++j) arr[i][j] = k++;
for (int j = i, k = s + rr; j < C; ++j) arr[j][C] = k++;
for (int j = C, k = s + 2 * rr; j > i; --j) arr[C][j] = k++;
for (int j = C, k = s + rr * 3; j > i; --j) arr[j][i] = k++;
}
if (n & 1) arr[count][count] = n * n;
}

void build_2a(int n)
{
const int count = n / 2u;
for (int i = 0, s = 1; i < count; ++i) {
const int len = n - 1 - 2 * i;
const int C = n - 1 - i;
for (int j = i, k = C; j < C; ++j, --k) {
arr[i][j] = s;
arr[j][C] = s + len;
arr[C][k] = s + 2 * len;
arr[k][i] = s + 3 * len;
++s;
}
s += 3 * len;
}
if (n & 1) arr[count][count] = n * n;
}

void build_2b(int n)
{
const int count = n / 2u;
for (int i = 0, s = 1; i < count; ++i) {
const int len = n - 1 - 2 * i;
const int C = n - 1 - i;
for (int j = i, ss = s + 4 * len - 1; j < C; ++j) {
arr[i][j] = s;
arr[j][C] = s + len;
arr[j + 1][i] = ss;
arr[C][j + 1] = ss - len;
++s;
--ss;
}
s += 3 * len;

}
if (n & 1) arr[count][count] = n * n;
}

void build_2c(int n)
{
const int count = n / 2u;
for (int i = 0, s = 1; i < count; ++i) {
const int len = n - 1 - 2 * i;
const int C = n - 1 - i;
arr[i][i] = s;
arr[i][C] = s + len;
arr[C][C] = s + 2 * len;
arr[C][i] = s + 3 * len;
++s;
for (int j = i + 1, ss = s + 4 * len - 2; j < C; ++j) {
arr[i][j] = s;
arr[j][C] = s + len;
arr[j][i] = ss;
arr[C][j] = ss - len;
++s;
--ss;
}
s += 3 * len;

}
if (n & 1) arr[count][count] = n * n;
}

void build_3a(int n)
{
for (int i = 0; i < n; ++i) {
for (int j = 0; j < n; ++j) {
if (i <= j) {
int k = min(i, n - 1 - j);
arr[i][j] = 4 * k * (n - k) + 1 + (i + j - k * 2);
} else {
int k = min(j, n - 1 - i) + 1;
arr[i][j] = 4 * k * (n - k) + 1 - (i + j - (k - 1) * 2);
}
}
}
}

void build_3b(int n)
{
for (int i = 0; i < n; ++i) {
for (int j = 0; j < i; ++j) {
int k = min(j, n - 1 - i) + 1;
arr[i][j] = 4 * k * (n - k) + 1 - (i + j - (k - 1) * 2);
}
for (int j = i; j < n; ++j) {
int k = min(i, n - 1 - j);
arr[i][j] = 4 * k * (n - k) + 1 + (i + j - k * 2);
}
}
}

void build_4(int n)
{
for (int j = 0; j < n; ++j) arr[0][j] = j + 1;
const int mid = (n + 1) / 2u;
for (int i = 1; i < mid; ++i) {
int j = 0;
for (; j + 1 < i; ++j) arr[i][j] = arr[i-1][j] - 1;
int s = 4 * i * (n - i);
for (int C = n - i; j < C; ++j) arr[i][j] = s++;
for (; j < n; ++j) arr[i][j] = arr[i-1][j] + 1;
}

for (int i = mid; i < n; ++i) {
int j = 0;
int C = n - 1 - i;
for (; j < C; ++j) arr[i][j] = arr[i-1][j] - 1;
int s = 4 * C * (n - C) + 1 + 3 * (n - 1 - 2 * C);
for (; j <= i; ++j) arr[i][j] = s--;
for (; j < n; ++j) arr[i][j] = arr[i-1][j] + 1;
}
}

void print(int n)
{
for (int i = 0; i < n; ++i) {
for (int j = 0; j < n; ++j)
cout.width(3),cout << arr[i][j] << " ";
cout << "\n";
}
cout << "\n";
}

struct Func {
const char *name;
void (*func)(int n);
};

void test(Func pf[], size_t len, int n, int count = 1, int M = 1)
{
if (count < 0) {
for (size_t k = 0; k < len; ++k) {
cout << pf[k].name << " :\n";
pf[k].func(n);
print(n);
}
return;
}

static vector<size_t> a;
a.assign(len, 0);
basic_build(n);
for (int k = 0; k < count; ++k)
for (size_t i = 0; i < len; ++i) {
clock_t ta = clock();
for (int j = 0; j < M; ++j) pf[i].func(n);
ta = clock() - ta;
printf("%d %s %ld\n",n, pf[i].name, ta);
a[i] += ta;
}
int total = M * count;
if (total <= 0) return;
cout << "\nResult: " << n << "\n";
for (size_t k = 0; k < len; ++k)
cout << pf[k].name << " " << a[k] / total << "\n";
cout << "\n";
}

int main()
{
SYSTEM_INFO info;
GetSystemInfo(&info);
if (info.dwNumberOfProcessors >= 2)
SetProcessAffinityMask( GetCurrentProcess(),2);

Func pf[]={
{"build_1a", build_1a},
{"build_1b", build_1b},
{"build_2a", build_2a},
{"build_2b", build_2b},
{"build_2c", build_2c},
{"build_3a", build_3a},
{"build_3b", build_3b},
{"build_4 ", build_4},
{"basic ", basic_build},
};

const size_t sz = sizeof(pf)/sizeof(pf[0]);
//test(pf, sz, 5, -1);
//test(pf, sz, N, 5);
test(pf, sz, N, 1, 5);
}
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: