Quote:
Originally Posted by StainlessS
I would not be too eager to do 64 bit TWriteAVI, I dont know the VirtualDub code that well (I aint a CPP progger) and have no idea how it would perform if complied x64. (EDIT: I'm on XP32 and so could not test it)
|
Just set up VS 2008, and attempted to compile 64 bit version, no go I'm afraid.
There are (I think) 3 files containing intel assembler (which I dont speak),
1) cpuaccel.cpp. CPU capability detector.
2) misc.cpp. Two MulDiv type functions, signed and unsigned.
3) FastWriteStream.cpp. Fast file writing functions, with some kind of thread switching/locking (which I also dont have much of a clue about).
Given that I also cannot test anything, cannot procede any further, sorry again.
EDIT:
cpuaccel.cpp
Code:
extern "C" {
bool FPU_enabled, MMX_enabled, ISSE_enabled;
};
// This is ridiculous.
static long CPUCheckForSSESupport() {
__try {
// __asm andps xmm0,xmm0
__asm _emit 0x0f
__asm _emit 0x54
__asm _emit 0xc0
} __except(EXCEPTION_EXECUTE_HANDLER) {
if (_exception_code() == STATUS_ILLEGAL_INSTRUCTION)
g_lCPUExtensionsAvailable &= ~(CPUF_SUPPORTS_SSE|CPUF_SUPPORTS_SSE2);
}
return g_lCPUExtensionsAvailable;
}
long __declspec(naked) CPUCheckForExtensions() {
__asm {
push ebp
push edi
push esi
push ebx
xor ebp,ebp ;cpu flags - if we don't have CPUID, we probably
;won't want to try FPU optimizations.
;check for CPUID.
pushfd ;flags -> EAX
pop eax
or eax,00200000h ;set the ID bit
push eax ;EAX -> flags
popfd
pushfd ;flags -> EAX
pop eax
and eax,00200000h ;ID bit set?
jz done ;nope...
;CPUID exists, check for features register.
mov ebp,00000003h
xor eax,eax
cpuid
or eax,eax
jz done ;no features register?!?
;features register exists, look for MMX, SSE, SSE2.
mov eax,1
cpuid
mov ebx,edx
and ebx,00800000h ;MMX is bit 23
shr ebx,21
or ebp,ebx ;set bit 2 if MMX exists
mov ebx,edx
and edx,02000000h ;SSE is bit 25
shr edx,25
neg edx
and edx,00000018h ;set bits 3 and 4 if SSE exists
or ebp,edx
and ebx,04000000h ;SSE2 is bit 26
shr ebx,21
and ebx,00000020h ;set bit 5
or ebp,ebx
;check for vendor feature register (K6/Athlon).
mov eax,80000000h
cpuid
mov ecx,80000001h
cmp eax,ecx
jb done
;vendor feature register exists, look for 3DNow! and Athlon extensions
mov eax,ecx
cpuid
mov eax,edx
and edx,80000000h ;3DNow! is bit 31
shr edx,25
or ebp,edx ;set bit 6
mov edx,eax
and eax,40000000h ;3DNow!2 is bit 30
shr eax,23
or ebp,eax ;set bit 7
and edx,00400000h ;AMD MMX extensions (integer SSE) is bit 22
shr edx,19
or ebp,edx
done:
mov eax,ebp
mov g_lCPUExtensionsAvailable, ebp
;Full SSE and SSE-2 require OS support for the xmm* registers.
test eax,00000030h
jz nocheck
call CPUCheckForSSESupport
nocheck:
pop ebx
pop esi
pop edi
pop ebp
ret
}
}
misc.cpp
Code:
long __declspec(naked) MulDivTrunc(long a, long b, long c) {
__asm {
mov eax,[esp+4]
imul dword ptr [esp+8]
idiv dword ptr [esp+12]
ret
}
}
unsigned __declspec(naked) __stdcall MulDivUnsigned(unsigned a, unsigned b, unsigned c) {
__asm {
mov eax,[esp+4]
mov ecx,[esp+12]
mul dword ptr [esp+8]
shr ecx,1
add eax,ecx
adc edx,0
div dword ptr [esp+12]
ret 12
}
}
FastWriteStream.cpp, two chunks of code something like this
Code:
if (lWritePointer >= lBufferSize)
lWritePointer -= lBufferSize;
// atomic add
__asm mov eax,this
__asm mov ebx,buffree
__asm lock add [eax]FastWriteStream.lDataPoint,ebx
// Signal the background thread if there might be enough to write.
//
// There's a chance that the background thread flips in here and
// takes out the data before we get to this point. But if that
// happens, well, it just did what we wanted it to anyway, so why
// bother signalling it?
if (lDataPoint >= lChunkSize)
SetEvent(hEventOkRead);
}
...
// Atomically update data point and signal write thread
__asm mov eax,this
__asm mov ebx,len
__asm lock sub [eax]FastWriteStream.lDataPoint,ebx
SetEvent(hEventOkWrite);
EDIT: Solved the misc.cpp prob as so (exact same code in Vapoursynth, from original VirtualDub source)
Code:
#if defined(WIN32) && defined(_M_IX86)
long __declspec(naked) MulDivTrunc(long a, long b, long c) {
__asm {
mov eax,[esp+4]
imul dword ptr [esp+8]
idiv dword ptr [esp+12]
ret
}
}
unsigned __declspec(naked) __stdcall MulDivUnsigned(unsigned a, unsigned b, unsigned c) {
__asm {
mov eax,[esp+4]
mov ecx,[esp+12]
mul dword ptr [esp+8]
shr ecx,1
add eax,ecx
adc edx,0
div dword ptr [esp+12]
ret 12
}
}
# elseif
long MulDivTrunc(long a, long b, long c) {
return (long)(((sint64)a * b) / c);
}
unsigned __stdcall MulDivUnsigned(unsigned a, unsigned b, unsigned c) {
return (unsigned)(((uint64)a * b + 0x80000000) / c);
}
# endif