实现
bool是一个字节
但是vector
因此
vector
虽然O3优化可以加速,但是开O3优化后可能出问题
且for(auto&v:vector
不如bitset(编译期知道长度,定长,一个比特表示一个bool)或bool*(变量长度,定长,一个字节表示一个数组)或vector
性能对比:vector, vector, bool*
perfomance部分是20次的平均,初始化bool*数组用的是原始的for loop
结果:bool*最好
无-O3
编译
bool*最快
vector
vector
vector<bool>
[alloc_and_init] 38624 us
[ave performance] 33919797 us
vector<char>
[alloc_and_init] 327476 us
[ave performance] 5525479 us
bool*
[alloc_and_init] 2190725 us
[ave performance] 3875693 us
-O3
编译
vector
vector
vector<bool>
[alloc_and_init] 54526 us
[ave performance] 1864292 us
vector<char>
[alloc_and_init] 406340 us
[ave performance] 741159 us
bool*
[alloc_and_init] 149163 us
[ave performance] 769090 us
代码
编译
g++ -std=c++17 -O3 -g -Wall -rdynamic main.cpp -o main
代码
#include <iostream>
#include <fstream>
#include <vector>
#include <chrono>
using namespace std;
typedef chrono::microseconds UnitType;
chrono::_V2::system_clock::time_point t_start, t_end;
UnitType d;
#define TIMING_START t_start = chrono::system_clock::now();
#define TIMING_END(str) \
t_end = chrono::system_clock::now(); \
d = chrono::duration_cast<UnitType>(t_end - t_start); \
cout << str << d.count() << " us" << endl;
void moving_average(double &old_ave, double new_val, int &cnt)
{
++cnt;
double tmp1 = old_ave * ((double(cnt - 1)) / cnt);
double tmp2 = new_val / cnt;
old_ave = tmp1 + tmp2;
}
// vector<bool>, vector<char>, bool*
template <typename Arr>
void benchmark_bool_array(Arr arr, size_t sz, int repeat_times, ostream &absorb_stdout)
{
double ave = 0;
int cnt = 0;
for (int i = 0; i < repeat_times; ++i)
{
std::cout << "rnd " << i;
TIMING_START
for (size_t j = 0; j < sz; ++j)
{
if (j % 2)
arr[j] = 1;
else
arr[j] = 0;
}
bool or_total = 0;
for (size_t j = 0; j < sz; ++j)
{
if (arr[j])
or_total = 1;
}
TIMING_END(" ")
absorb_stdout << or_total;
moving_average(ave, d.count(), cnt);
}
cout << "[ave performance] " << (int64_t)ave << " us" << endl;
}
void compare_bool_array(ostream &absorb_stdout)
{
size_t sz = 1000000000; // 10^9
int repeat_times = 20;
cout << "vector<bool>" << endl;
{
TIMING_START
vector<bool> arr(sz, 0);
TIMING_END("[alloc_and_init] ")
benchmark_bool_array<vector<bool>>(std::move(arr), sz, repeat_times, absorb_stdout);
}
cout << "\n\nvector<char>" << endl;
{
TIMING_START
vector<char> arr(sz, 0);
TIMING_END("[alloc_and_init] ")
benchmark_bool_array<vector<char>>(std::move(arr), sz, repeat_times, absorb_stdout);
}
cout << "\n\nbool*" << endl;
{
TIMING_START
bool *arr = new bool[sz];
for (size_t j = 0; j < sz; ++j)
arr[j] = 0;
TIMING_END("[alloc_and_init] ")
benchmark_bool_array<bool *>(arr, sz, repeat_times, absorb_stdout);
delete[] arr;
}
}
int main()
{
ofstream absorb_stdout("tmp.txt");
compare_bool_array(absorb_stdout);
return 0;
}
bool*
初始化对比
for loop和memset对比
perfomance部分是20次的平均
结果:memset更好
没有-O3
差异显著
[alloc] 6 us
for loop
[ave performance] 1847234 us
memset
[ave performance] 114449 us
-O3
仍有差异
[alloc] 5 us
for loop
[ave performance] 114162 us
memset
[ave performance] 150901 us
代码
工具函数等和上面的测评一样
void compare_bool_star_init()
{
size_t sz = 1000000000; // 10^9
int repeat_times = 20;
TIMING_START
bool *arr = new bool[sz];
TIMING_END("[alloc] ")
cout << "for loop" << endl;
double ave = 0;
int cnt = 0;
for (int i = 0; i < repeat_times; ++i)
{
std::cout << "rnd " << i;
TIMING_START
for (size_t j = 0; j < sz; ++j)
arr[j] = 0;
TIMING_END(" ")
moving_average(ave, d.count(), cnt);
}
cout << "[ave performance] " << (int64_t)ave << " us" << endl;
cout << "\n\nmemset" << endl;
ave = 0;
cnt = 0;
for (int i = 0; i < repeat_times; ++i)
{
std::cout << "rnd " << i;
TIMING_START
memset(arr, 0, sizeof(bool) * sz);
TIMING_END(" ")
moving_average(ave, d.count(), cnt);
}
cout << "[ave performance] " << (int64_t)ave << " us" << endl;
delete[] arr;
}
int main()
{
compare_bool_star_init();
return 0;
}