Sorry, your browser cannot access this site
This page requires browser support (enable) JavaScript
Learn more >

实现

bool是一个字节
但是vector实际上是用一个比特来表示一个bool

因此

vector慢(实际测试中:访问vector x次,访问vector 2*x次,但是访问vector的时间却是访问vector的7倍)
虽然O3优化可以加速,但是开O3优化后可能出问题
且for(auto&v:vector)等对于vector元素的引用会出现问题

不如bitset(编译期知道长度,定长,一个比特表示一个bool)或bool*(变量长度,定长,一个字节表示一个数组)或vector

性能对比:vector, vector, bool*

perfomance部分是20次的平均,初始化bool*数组用的是原始的for loop

结果:bool*最好

-O3编译

bool*最快
vector初始化比bool*快
vector慢麻了

vector<bool>
[alloc_and_init] 38624 us
[ave performance] 33919797 us

vector<char>
[alloc_and_init] 327476 us
[ave performance] 5525479 us

bool*
[alloc_and_init] 2190725 us
[ave performance] 3875693 us

-O3编译

vector和bool*性能类似,vector初始化比bool*慢
vector慢麻了

vector<bool>
[alloc_and_init] 54526 us
[ave performance] 1864292 us

vector<char>
[alloc_and_init] 406340 us
[ave performance] 741159 us

bool*
[alloc_and_init] 149163 us
[ave performance] 769090 us

代码

编译

g++ -std=c++17 -O3 -g -Wall -rdynamic main.cpp -o main

代码

#include <iostream>
#include <fstream>
#include <vector>
#include <chrono>

using namespace std;
typedef chrono::microseconds UnitType;
chrono::_V2::system_clock::time_point t_start, t_end;
UnitType d;
#define TIMING_START t_start = chrono::system_clock::now();
#define TIMING_END(str)                                   \
    t_end = chrono::system_clock::now();                  \
    d = chrono::duration_cast<UnitType>(t_end - t_start); \
    cout << str << d.count() << " us" << endl;

void moving_average(double &old_ave, double new_val, int &cnt)
{
    ++cnt;
    double tmp1 = old_ave * ((double(cnt - 1)) / cnt);
    double tmp2 = new_val / cnt;
    old_ave = tmp1 + tmp2;
}

// vector<bool>, vector<char>, bool*
template <typename Arr>
void benchmark_bool_array(Arr arr, size_t sz, int repeat_times, ostream &absorb_stdout)
{
    double ave = 0;
    int cnt = 0;
    for (int i = 0; i < repeat_times; ++i)
    {
        std::cout << "rnd " << i;
        TIMING_START
        for (size_t j = 0; j < sz; ++j)
        {
            if (j % 2)
                arr[j] = 1;
            else
                arr[j] = 0;
        }
        bool or_total = 0;
        for (size_t j = 0; j < sz; ++j)
        {
            if (arr[j])
                or_total = 1;
        }
        TIMING_END(" ")
        absorb_stdout << or_total;
        moving_average(ave, d.count(), cnt);
    }
    cout << "[ave performance] " << (int64_t)ave << " us" << endl;
}
void compare_bool_array(ostream &absorb_stdout)
{
    size_t sz = 1000000000; // 10^9
    int repeat_times = 20;
    cout << "vector<bool>" << endl;
    {
        TIMING_START
        vector<bool> arr(sz, 0);
        TIMING_END("[alloc_and_init] ")
        benchmark_bool_array<vector<bool>>(std::move(arr), sz, repeat_times, absorb_stdout);
    }

    cout << "\n\nvector<char>" << endl;
    {
        TIMING_START
        vector<char> arr(sz, 0);
        TIMING_END("[alloc_and_init] ")
        benchmark_bool_array<vector<char>>(std::move(arr), sz, repeat_times, absorb_stdout);
    }

    cout << "\n\nbool*" << endl;
    {
        TIMING_START
        bool *arr = new bool[sz];
        for (size_t j = 0; j < sz; ++j)
            arr[j] = 0;
        TIMING_END("[alloc_and_init] ")
        benchmark_bool_array<bool *>(arr, sz, repeat_times, absorb_stdout);
        delete[] arr;
    }
}

int main()
{
    ofstream absorb_stdout("tmp.txt");
    compare_bool_array(absorb_stdout);
    return 0;
}

bool*初始化对比

for loop和memset对比
perfomance部分是20次的平均

结果:memset更好

没有-O3

差异显著

[alloc] 6 us
for loop
[ave performance] 1847234 us

memset
[ave performance] 114449 us

-O3

仍有差异

[alloc] 5 us
for loop
[ave performance] 114162 us

memset
[ave performance] 150901 us

代码

工具函数等和上面的测评一样

void compare_bool_star_init()
{
    size_t sz = 1000000000; // 10^9
    int repeat_times = 20;
    TIMING_START
    bool *arr = new bool[sz];
    TIMING_END("[alloc] ")

    cout << "for loop" << endl;
    double ave = 0;
    int cnt = 0;
    for (int i = 0; i < repeat_times; ++i)
    {
        std::cout << "rnd " << i;
        TIMING_START
        for (size_t j = 0; j < sz; ++j)
            arr[j] = 0;
        TIMING_END(" ")
        moving_average(ave, d.count(), cnt);
    }
    cout << "[ave performance] " << (int64_t)ave << " us" << endl;

    cout << "\n\nmemset" << endl;
    ave = 0;
    cnt = 0;
    for (int i = 0; i < repeat_times; ++i)
    {
        std::cout << "rnd " << i;
        TIMING_START
        memset(arr, 0, sizeof(bool) * sz);
        TIMING_END(" ")
        moving_average(ave, d.count(), cnt);
    }
    cout << "[ave performance] " << (int64_t)ave << " us" << endl;

    delete[] arr;
}

int main()
{
    compare_bool_star_init();
    return 0;
}

评论