View Single Post
Old 4th September 2017, 03:43   #13  |  Link
StainlessS
HeartlessS Usurer
 
StainlessS's Avatar
 
Join Date: Dec 2009
Location: Over the rainbow
Posts: 10,980
Quote:
Originally Posted by StainlessS View Post
I would not be too eager to do 64 bit TWriteAVI, I dont know the VirtualDub code that well (I aint a CPP progger) and have no idea how it would perform if complied x64. (EDIT: I'm on XP32 and so could not test it)
Just set up VS 2008, and attempted to compile 64 bit version, no go I'm afraid.
There are (I think) 3 files containing intel assembler (which I dont speak),
1) cpuaccel.cpp. CPU capability detector.
2) misc.cpp. Two MulDiv type functions, signed and unsigned.
3) FastWriteStream.cpp. Fast file writing functions, with some kind of thread switching/locking (which I also dont have much of a clue about).

Given that I also cannot test anything, cannot procede any further, sorry again.

EDIT:
cpuaccel.cpp
Code:
extern "C" {
	bool FPU_enabled, MMX_enabled, ISSE_enabled;
};

// This is ridiculous.

static long CPUCheckForSSESupport() {
	__try {
//		__asm andps xmm0,xmm0

		__asm _emit 0x0f
		__asm _emit 0x54
		__asm _emit 0xc0

	} __except(EXCEPTION_EXECUTE_HANDLER) {
		if (_exception_code() == STATUS_ILLEGAL_INSTRUCTION)
			g_lCPUExtensionsAvailable &= ~(CPUF_SUPPORTS_SSE|CPUF_SUPPORTS_SSE2);
	}

	return g_lCPUExtensionsAvailable;
}

long __declspec(naked) CPUCheckForExtensions() {
	__asm {
		push	ebp
		push	edi
		push	esi
		push	ebx

		xor		ebp,ebp			;cpu flags - if we don't have CPUID, we probably
								;won't want to try FPU optimizations.

		;check for CPUID.

		pushfd					;flags -> EAX
		pop		eax
		or		eax,00200000h	;set the ID bit
		push	eax				;EAX -> flags
		popfd
		pushfd					;flags -> EAX
		pop		eax
		and		eax,00200000h	;ID bit set?
		jz		done			;nope...

		;CPUID exists, check for features register.

		mov		ebp,00000003h
		xor		eax,eax
		cpuid
		or		eax,eax
		jz		done			;no features register?!?

		;features register exists, look for MMX, SSE, SSE2.

		mov		eax,1
		cpuid
		mov		ebx,edx
		and		ebx,00800000h	;MMX is bit 23
		shr		ebx,21
		or		ebp,ebx			;set bit 2 if MMX exists

		mov		ebx,edx
		and		edx,02000000h	;SSE is bit 25
		shr		edx,25
		neg		edx
		and		edx,00000018h	;set bits 3 and 4 if SSE exists
		or		ebp,edx

		and		ebx,04000000h	;SSE2 is bit 26
		shr		ebx,21
		and		ebx,00000020h	;set bit 5
		or		ebp,ebx

		;check for vendor feature register (K6/Athlon).

		mov		eax,80000000h
		cpuid
		mov		ecx,80000001h
		cmp		eax,ecx
		jb		done

		;vendor feature register exists, look for 3DNow! and Athlon extensions

		mov		eax,ecx
		cpuid

		mov		eax,edx
		and		edx,80000000h	;3DNow! is bit 31
		shr		edx,25
		or		ebp,edx			;set bit 6

		mov		edx,eax
		and		eax,40000000h	;3DNow!2 is bit 30
		shr		eax,23
		or		ebp,eax			;set bit 7

		and		edx,00400000h	;AMD MMX extensions (integer SSE) is bit 22
		shr		edx,19
		or		ebp,edx

done:
		mov		eax,ebp
		mov		g_lCPUExtensionsAvailable, ebp

		;Full SSE and SSE-2 require OS support for the xmm* registers.

		test	eax,00000030h
		jz		nocheck
		call	CPUCheckForSSESupport
nocheck:
		pop		ebx
		pop		esi
		pop		edi
		pop		ebp
		ret
	}
}
misc.cpp
Code:
long __declspec(naked) MulDivTrunc(long a, long b, long c) {
	__asm {
		mov eax,[esp+4]
		imul dword ptr [esp+8]
		idiv dword ptr [esp+12]
		ret
	}
}

unsigned __declspec(naked) __stdcall MulDivUnsigned(unsigned a, unsigned b, unsigned c) {
	__asm {
		mov		eax,[esp+4]
		mov		ecx,[esp+12]
		mul		dword ptr [esp+8]
		shr		ecx,1
		add		eax,ecx
		adc		edx,0
		div		dword ptr [esp+12]
		ret		12
	}
}
FastWriteStream.cpp, two chunks of code something like this
Code:
		if (lWritePointer >= lBufferSize)
			lWritePointer -= lBufferSize;

		// atomic add

		__asm mov eax,this
		__asm mov ebx,buffree
		__asm lock add [eax]FastWriteStream.lDataPoint,ebx

		// Signal the background thread if there might be enough to write.
		//
		// There's a chance that the background thread flips in here and
		// takes out the data before we get to this point.  But if that
		// happens, well, it just did what we wanted it to anyway, so why
		// bother signalling it?

		if (lDataPoint >= lChunkSize)
			SetEvent(hEventOkRead);
	}

...

	// Atomically update data point and signal write thread

	__asm mov eax,this
	__asm mov ebx,len
	__asm lock sub [eax]FastWriteStream.lDataPoint,ebx

	SetEvent(hEventOkWrite);
EDIT: Solved the misc.cpp prob as so (exact same code in Vapoursynth, from original VirtualDub source)
Code:
#if defined(WIN32) && defined(_M_IX86)

	long __declspec(naked) MulDivTrunc(long a, long b, long c) {
		__asm {
			mov eax,[esp+4]
			imul dword ptr [esp+8]
			idiv dword ptr [esp+12]
			ret
		}
	}

	unsigned __declspec(naked) __stdcall MulDivUnsigned(unsigned a, unsigned b, unsigned c) {
		__asm {
			mov		eax,[esp+4]
			mov		ecx,[esp+12]
			mul		dword ptr [esp+8]
			shr		ecx,1
			add		eax,ecx
			adc		edx,0
			div		dword ptr [esp+12]
			ret		12
		}
	}
# elseif
	long MulDivTrunc(long a, long b, long c) {
		return (long)(((sint64)a * b) / c);
	}

	unsigned __stdcall MulDivUnsigned(unsigned a, unsigned b, unsigned c) {
		return (unsigned)(((uint64)a * b + 0x80000000) / c);
}
# endif
__________________
I sometimes post sober.
StainlessS@MediaFire ::: AND/OR ::: StainlessS@SendSpace

"Some infinities are bigger than other infinities", but how many of them are infinitely bigger ???

Last edited by StainlessS; 5th September 2017 at 18:17.
StainlessS is offline   Reply With Quote