寄存器變量速度比普通變量存取速度快。對于C程序,寄存器變量不能取地址,編譯器會報錯。對于C++程序,可以對寄存器變量進(jìn)行取址操作,編譯器不會報錯,但是取出來的地址似乎不是寄存器地址,而是內(nèi)存地址,不知道是不是C++編譯器在涉及取址運(yùn)算時將寄存器變量自動轉(zhuǎn)換成普通變量來處理。
1、只有普通運(yùn)算
對于上述的普通累加運(yùn)算而言,采用普通變量耗時0.7177秒,采用寄存器變量耗時0.111秒,速度上確實(shí)有明顯的差別。
2、涉及取址運(yùn)算
如果涉及取址運(yùn)算,采用普通變量耗時0.7867秒,采用寄存器變量耗時0.4792秒,速度上的差別就沒有那么顯著了。大家可以發(fā)現(xiàn)兩種變量取出的地址分別是0x6ffe38和0x6ffe3c,是連續(xù)的兩個地址,那都是內(nèi)存地址。不能確定,是不是C++編譯器在涉及取址運(yùn)算時自動將寄存器變量當(dāng)成普通變量來處理。
實(shí)際使用時,底層硬件環(huán)境的實(shí)際情況對寄存器變量的使用會有一些限制。每個函數(shù)中只有很少的變量可以保存在寄存器中,且只允許某些類型的變量。但是,過量的寄存器聲明并沒有什么害處,這是因?yàn)榫幾g器可以忽略過量的或者不支持的寄存器變量聲明。另外,無論寄存器變量實(shí)際上是不是存放在寄存器中,它的地址都是不能訪問的。在不同的機(jī)器中,對寄存器變量的數(shù)目和類型的具體限制也是不同的。 ——《C程序設(shè)計語言(第二版) Brain W.Kernighan & Dennis M.Ritchie》
對于C程序,寄存器變量是不能取址的:
幾種線程本地存儲變量和普通變量的性能比較
God一直致力于研究高并發(fā)服務(wù)端的開發(fā),這次要優(yōu)化的是libGod庫中的線程本地存儲變量,線程本地存儲變量訪問非常頻繁,優(yōu)化后庫的性能應(yīng)該會提高不少。已知的線程本地存儲方法有boost中的thread_specific_ptr類,gcc中的__thread關(guān)鍵字,pthread中的pthread_getspecific函數(shù)。這次測試這3中本地存儲以及普通變量之間的性能差別,代碼如下:
#include 《iostream》
#include 《stdio.h》
#include 《pthread.h》
#include 《boost/thread/thread.hpp》
#include 《boost/thread/tss.hpp》
using namespace std;
class C {
public:
C(int a) {
m_a = a;
printf(“C() %d\n”, m_a);
}
~C() {
printf(“~C() %d\n”, m_a);
}
private:
int m_a;
};
#define TM 3
#if TM == 1
boost::thread_specific_ptr《C》 pc;
const char *testType = “boost”;
#elif TM == 2
__thread C *pc;
const char *testType = “__thread”;
#elif TM == 3
pthread_key_t pc;
const char *testType = “pthread”;
#else
C *pc;
const char *testType = “normal”;
#endif
void boostthreadFunc() {
#if TM == 1
pc.reset(new C(10));
#elif TM == 2
pc = new C(20);
#elif TM == 3
if (pthread_key_create(&pc, NULL)) {
cout 《《 “pthread_key_create” 《《 endl;
return;
}
if (pthread_setspecific(pc, new C(30))) {
cout 《《 “pthread_setspecific” 《《 endl;
return;
}
#else
pc = new C(20);
#endif
int switches = 5000000;
int i = switches;
struct timeval tm_start, tm_end;
gettimeofday(&tm_start, NULL);
while (i--) {
#if TM == 1
C *c1 = pc.get();
C *c2 = pc.get();
C *c3 = pc.get();
C *c4 = pc.get();
C *c5 = pc.get();
C *c6 = pc.get();
C *c7 = pc.get();
C *c8 = pc.get();
C *c9 = pc.get();
C *c10 = pc.get();
C *c11 = pc.get();
C *c12 = pc.get();
C *c13 = pc.get();
C *c14 = pc.get();
C *c15 = pc.get();
C *c16 = pc.get();
C *c17 = pc.get();
C *c18 = pc.get();
C *c19 = pc.get();
C *c20 = pc.get();
C *c21 = pc.get();
C *c22 = pc.get();
C *c23 = pc.get();
C *c24 = pc.get();
C *c25 = pc.get();
C *c26 = pc.get();
C *c27 = pc.get();
C *c28 = pc.get();
C *c29 = pc.get();
C *c30 = pc.get();
C *c31 = pc.get();
C *c32 = pc.get();
C *c33 = pc.get();
C *c34 = pc.get();
C *c35 = pc.get();
C *c36 = pc.get();
C *c37 = pc.get();
C *c38 = pc.get();
C *c39 = pc.get();
C *c40 = pc.get();
#elif TM == 2
C *c1 = pc;
C *c2 = pc;
C *c3 = pc;
C *c4 = pc;
C *c5 = pc;
C *c6 = pc;
C *c7 = pc;
C *c8 = pc;
C *c9 = pc;
C *c10 = pc;
C *c11 = pc;
C *c12 = pc;
C *c13 = pc;
C *c14 = pc;
C *c15 = pc;
C *c16 = pc;
C *c17 = pc;
C *c18 = pc;
C *c19 = pc;
C *c20 = pc;
C *c21 = pc;
C *c22 = pc;
C *c23 = pc;
C *c24 = pc;
C *c25 = pc;
C *c26 = pc;
C *c27 = pc;
C *c28 = pc;
C *c29 = pc;
C *c30 = pc;
C *c31 = pc;
C *c32 = pc;
C *c33 = pc;
C *c34 = pc;
C *c35 = pc;
C *c36 = pc;
C *c37 = pc;
C *c38 = pc;
C *c39 = pc;
C *c40 = pc;
#elif TM == 3
C *c1 = (C *)pthread_getspecific(pc);
C *c2 = (C *)pthread_getspecific(pc);
C *c3 = (C *)pthread_getspecific(pc);
C *c4 = (C *)pthread_getspecific(pc);
C *c5 = (C *)pthread_getspecific(pc);
C *c6 = (C *)pthread_getspecific(pc);
C *c7 = (C *)pthread_getspecific(pc);
C *c8 = (C *)pthread_getspecific(pc);
C *c9 = (C *)pthread_getspecific(pc);
C *c10 = (C *)pthread_getspecific(pc);
C *c11 = (C *)pthread_getspecific(pc);
C *c12 = (C *)pthread_getspecific(pc);
C *c13 = (C *)pthread_getspecific(pc);
C *c14 = (C *)pthread_getspecific(pc);
C *c15 = (C *)pthread_getspecific(pc);
C *c16 = (C *)pthread_getspecific(pc);
C *c17 = (C *)pthread_getspecific(pc);
C *c18 = (C *)pthread_getspecific(pc);
C *c19 = (C *)pthread_getspecific(pc);
C *c20 = (C *)pthread_getspecific(pc);
C *c21 = (C *)pthread_getspecific(pc);
C *c22 = (C *)pthread_getspecific(pc);
C *c23 = (C *)pthread_getspecific(pc);
C *c24 = (C *)pthread_getspecific(pc);
C *c25 = (C *)pthread_getspecific(pc);
C *c26 = (C *)pthread_getspecific(pc);
C *c27 = (C *)pthread_getspecific(pc);
C *c28 = (C *)pthread_getspecific(pc);
C *c29 = (C *)pthread_getspecific(pc);
C *c30 = (C *)pthread_getspecific(pc);
C *c31 = (C *)pthread_getspecific(pc);
C *c32 = (C *)pthread_getspecific(pc);
C *c33 = (C *)pthread_getspecific(pc);
C *c34 = (C *)pthread_getspecific(pc);
C *c35 = (C *)pthread_getspecific(pc);
C *c36 = (C *)pthread_getspecific(pc);
C *c37 = (C *)pthread_getspecific(pc);
C *c38 = (C *)pthread_getspecific(pc);
C *c39 = (C *)pthread_getspecific(pc);
C *c40 = (C *)pthread_getspecific(pc);
#else
C *c1 = pc;
C *c2 = pc;
C *c3 = pc;
C *c4 = pc;
C *c5 = pc;
C *c6 = pc;
C *c7 = pc;
C *c8 = pc;
C *c9 = pc;
C *c10 = pc;
C *c11 = pc;
C *c12 = pc;
C *c13 = pc;
C *c14 = pc;
C *c15 = pc;
C *c16 = pc;
C *c17 = pc;
C *c18 = pc;
C *c19 = pc;
C *c20 = pc;
C *c21 = pc;
C *c22 = pc;
C *c23 = pc;
C *c24 = pc;
C *c25 = pc;
C *c26 = pc;
C *c27 = pc;
C *c28 = pc;
C *c29 = pc;
C *c30 = pc;
C *c31 = pc;
C *c32 = pc;
C *c33 = pc;
C *c34 = pc;
C *c35 = pc;
C *c36 = pc;
C *c37 = pc;
C *c38 = pc;
C *c39 = pc;
C *c40 = pc;
#endif
}
gettimeofday(&tm_end, NULL);
switches *= 40;
long long ns = (tm_end.tv_sec - tm_start.tv_sec) * 1000LL * 1000LL * 1000LL +
(tm_end.tv_usec - tm_start.tv_usec) * 1000LL;
std::cout 《《 “####Benchmark result#### ” 《《 testType 《《 std::endl;
std::cout 《《 “Totol switches : ” 《《 switches 《《 std::endl;
std::cout 《《 “Cost per switch(ns) : ” 《《 (double)ns/switches 《《 std::endl;
std::cout 《《 “All cost switch(ns) : ” 《《 ns 《《 std::endl;
std::cout 《《 “####Benchmark result####” 《《 std::endl;
}
int main() {
boost::thread bt(&boostthreadFunc);
bt.join();
printf(“main exit.。\n”);
return 0;
}
評論
查看更多