|
|
这是小弟写的memset代替函数,与官方memset对比测试快了4倍,大家可以测试哈看看:
_inline void memset32( void* p, unsigned char byValue, unsigned long ulDataSize )
{
unsigned long ulValue = byValue
| ( ( unsigned long )byValue << 8 )
| ( ( unsigned long )byValue << 16 )
| ( ( unsigned long )byValue << 24 );
unsigned long ulDwordSize = ulDataSize >> 2;
unsigned long ulByteSize = ulDataSize - ( ulDwordSize << 2 );
unsigned char* p1 = ( unsigned char* )( ( unsigned long )p + ( ulDwordSize << 2 ) );
_asm
{
mov edi, p;
test edi, edi // p == 0?
jz short toend // if so, nothing to do
mov ecx, ulDwordSize;
test ecx, ecx //ulDwordSize == 0?
jz short toend // if so, nothing to do
mov eax, ulValue;
rep stosd;
mov edi, p1;
test edi, edi // p1 == 0?
jz short toend // if so, nothing to do
mov edx, ulByteSize;
test edx,edx // ulByteSize == 0?
jz short toend // if so, nothing to do
mov al, byValue;
tail:
mov [edi],al //set remaining bytes
add edi,1
sub edx,1 //if there is some more bytes
jnz tail //continue to fill them
toend:
}
}
这是测试填充65539字节的代码:
float time0 = 0;
float time1 = 0;
int iForCount = 1;
const long NUMBER = 65539;
//unsigned char a[NUMBER];
unsigned char* a = NULL;
if( !New( &a, NUMBER ) )return false;
time0 = TimeSystem.GetTime();
for( int i = 0; i < iForCount; i++ )
{
memset32( a, 255, NUMBER );
}
time1 = TimeSystem.GetTime();
COUT<<T("新算法:")<<endl;
COUT<<T("耗时: ")<<ValueFormatToStr( time1 - time0, -20, 10 )<<endl<<endl;
time0 = TimeSystem.GetTime();
for( i = 0; i < iForCount; i++ )
{
::memset( a, 255, NUMBER );
}
time1 = TimeSystem.GetTime();
COUT<<T("传统算法:")<<endl;
COUT<<T("耗时: ")<<ValueFormatToStr( time1 - time0, -20, 10 )<<endl<<endl;
ReleasePointer( a );
附上截图 |
|