Log in

View Full Version : question about mpeg2dec code


jfcarbel
24th September 2003, 05:38
I am looking at the mpeg2dec3 source and am having trouble finding where the actual decode method that is eventually used by getFrame() is located. Can someone point me to the correct source file for the code for the Decode() method.

void MPEG2Source::GetFrame(int n, unsigned char *buffer, int pitch)
{
out->y = buffer;
out->u = out->y + (pitch * pvi.height);
out->v = out->u + ((pitch * pvi.height)/4);
out->ypitch = pitch;
out->uvpitch = pitch/2;

m_decoder.Decode(n, out);

__asm emms;
}

where CMPEG2Decoder m_decoder.

int 21h
24th September 2003, 06:08
I think this is it.. its been awhile since I've messed around in this code.


void CMPEG2Decoder::Decode(unsigned char *dst, DWORD frame, int pitch)
{
DWORD i, now, size, origin, ref, fo;
int remain;

CMPEG2Decoder* in = this;

if (FO_Flag==FO_FILM)
{
fo = 0;
frame = FrameList[frame]->top;
}

origin = frame;

if (FO_Flag!=FO_FILM)
{
if (FrameList[frame]->top == FrameList[frame]->bottom)
{
fo = 0;
frame = FrameList[frame]->top;
}
else if (FrameList[frame]->top < FrameList[frame]->bottom)
{
fo = 1;
frame = FrameList[frame]->top;
}
else
{
fo = 2;
frame = FrameList[frame]->bottom;
}
}

ref = frame;

if (frame >= GOPList[in->VF_GOPLimit-1]->number)
{
now = in->VF_GOPLimit-1;
ref -= GOPList[in->VF_GOPLimit-1]->number;
size = in->VF_FrameBound - GOPList[in->VF_GOPLimit-1]->number + 1;
}
else
for (now = 0; now < (in->VF_GOPLimit-1); now++)
{
if (frame>=GOPList[now]->number && frame<GOPList[now+1]->number)
{
ref -= GOPList[now]->number;
size = GOPList[now+1]->number - GOPList[now]->number + 1;
break;
}
}

if (fo)
ref ++;

if (now != in->VF_GOPNow)
{
if ((in->VF_OldFrame + 1)==origin)
{
if (Full_Frame)
{
Get_Hdr();
Decode_Picture(1, dst, pitch);

if (picture_structure!=FRAME_PICTURE)
{
Get_Hdr();
Decode_Picture(1, dst, pitch);
}
}
else
switch (fo)
{
case 0:
if (!FrameList[origin]->backward)
{
Get_Hdr();
Decode_Picture(1, dst, pitch);

if (picture_structure!=FRAME_PICTURE)
{
Get_Hdr();
Decode_Picture(1, dst, pitch);
}

if (FrameList[origin]->forward)
{
if (Field_Order)
Copyodd(dst, dstFrame, pitch, 1);
else
Copyeven(dst, dstFrame, pitch, 1);
}
}
else
{
Copyodd(dstFrame, dst, pitch, 0);
Copyeven(dstFrame, dst, pitch, 0);
}
break;

case 1:
Copyodd(dstFrame, dst, pitch, 0);

Get_Hdr();
Decode_Picture(1, dstFrame, DSTBYTES);

if (picture_structure!=FRAME_PICTURE)
{
Get_Hdr();
Decode_Picture(1, dstFrame, DSTBYTES);
}

Copyeven(dstFrame, dst, pitch, 0);
break;

case 2:
Copyeven(dstFrame, dst, pitch, 0);

Get_Hdr();
Decode_Picture(1, dstFrame, DSTBYTES);

if (picture_structure!=FRAME_PICTURE)
{
Get_Hdr();
Decode_Picture(1, dstFrame, DSTBYTES);
}

Copyodd(dstFrame, dst, pitch, 0);
break;
}

if (in->VF_GOPSize)
{
for (i=0; i < in->VF_GOPSize; i++)
free(GOPBuffer[i]);

in->VF_GOPSize = 0;
}

in->VF_GOPNow = in->VF_GOPLimit;
in->VF_OldFrame = origin;
return;
}

remain = ref;
in->VF_OldRef = ref;
in->VF_GOPNow = now;
Second_Field = 0;

if (size < in->VF_GOPSize)
for (i=0; i < (in->VF_GOPSize - size); i++)
free(GOPBuffer[size+i]);
else if (size > in->VF_GOPSize)
for (i=0; i < (size - in->VF_GOPSize); i++)
GOPBuffer[in->VF_GOPSize+i] = reinterpret_cast<unsigned char*>(malloc(in->VF_FrameSize));

in->VF_GOPSize = size;

File_Flag = GOPList[now]->file;
_lseeki64(Infile[GOPList[now]->file], GOPList[now]->position, SEEK_SET);
Initialize_Buffer();

while (Get_Hdr() && picture_coding_type!=I_TYPE);

Decode_Picture(0, dst, pitch);

while (Get_Hdr() && picture_coding_type==B_TYPE);

if (picture_structure!=FRAME_PICTURE)
{
Decode_Picture(0, dst, pitch);
Get_Hdr();
}

Decode_Picture(1, dst, pitch);

if (picture_structure!=FRAME_PICTURE)
{
Get_Hdr();
Decode_Picture(1, dst, pitch);
}

Copyodd(dst, GOPBuffer[0], pitch, 1);
Copyeven(dst, GOPBuffer[0], pitch, 1);

while (remain && Get_Hdr())
{
Decode_Picture(1, dst, pitch);

if (picture_structure!=FRAME_PICTURE)
{
Get_Hdr();
Decode_Picture(1, dst, pitch);
}

Copyodd(dst, GOPBuffer[ref - remain + 1], pitch, 1);
Copyeven(dst, GOPBuffer[ref - remain + 1], pitch, 1);

remain--;
}

if (!Full_Frame && ref>=(size-2))
{
Copyodd(dst, dstFrame, pitch, 1);
Copyeven(dst, dstFrame, pitch, 1);
}
}
else
{
remain = ref - in->VF_OldRef;

if (remain > 0)
{
in->VF_OldRef = ref;

while (remain && Get_Hdr())
{
Decode_Picture(1, dst, pitch);

if (picture_structure!=FRAME_PICTURE)
{
Get_Hdr();
Decode_Picture(1, dst, pitch);
}

Copyodd(dst, GOPBuffer[ref - remain + 1], pitch, 1);
Copyeven(dst, GOPBuffer[ref - remain + 1], pitch, 1);

remain--;
}

if (!Full_Frame && ref>=(size-2))
{
Copyodd(dst, dstFrame, pitch, 1);
Copyeven(dst, dstFrame, pitch, 1);
}
}
}

switch (fo)
{
case 0:
Copyodd(GOPBuffer[ref], dst, pitch, 0);
Copyeven(GOPBuffer[ref], dst, pitch, 0);
break;

case 1:
Copyodd(GOPBuffer[ref-1], dst, pitch, 0);
Copyeven(GOPBuffer[ref], dst, pitch, 0);
break;

case 2:
Copyodd(GOPBuffer[ref], dst, pitch, 0);
Copyeven(GOPBuffer[ref-1], dst, pitch, 0);
break;
}

in->VF_OldFrame = origin;
}

void CMPEG2Decoder::Decode_Picture(int ref, unsigned char *dst, int pitch)
{
if (picture_structure==FRAME_PICTURE && Second_Field)
Second_Field = 0;

if (picture_coding_type!=B_TYPE)
{
pf_forward = pf_backward;
pf_backward = pf_current;
}

Update_Picture_Buffers();

picture_data();

if (ref && (picture_structure==FRAME_PICTURE || Second_Field))
{
if (picture_coding_type==B_TYPE)
assembleFrame(auxframe, pf_current, dst, pitch);
else
assembleFrame(forward_reference_frame, pf_forward, dst, pitch);
}

if (picture_structure!=FRAME_PICTURE)
Second_Field = !Second_Field;
}

jfcarbel
24th September 2003, 23:32
Can someone explain at a high level the basics of what this method is doing. Since it is accepting a frame as it parameter, I assume that it is doing some kind of search for the frame in the stream. Is this correct? Just wondering what all this logic is and why it is so much longer than Decode_Picture()

Also I say some code implemented in a Delphi program that used the DVD2AVI code to create and mpeg2 player and it actually played the streams smoothly as the original not like DVD2AVI's preview.

It looked like this was accomplished by writing there own getFrame method rather than using the VFAPI.cpp published one. The method looked like this:

DLLExport(pbyte) GetMPEG2Frame()
{
try {
if(Get_Hdr())
{
Decode_Picture();
return FrameBuffer;
}
}
catch(EStopPlaying) { }
return NULL;
}


What the programmer did was create a play thread with an instance of the mpeg2 decoder inside it and just kept calling his GetMPEG2Frame(). Is this the correct way to use the DVD2AVI code to play an mpeg2 stream? His call of Get_Hdr() is probably inefficient, he should probably write his own Get_PictureHdr() and call this so that it only breaks when it find the actual picture.

Nic
25th September 2003, 16:39
"Can someone explain at a high level the basics of what this method is doing"

It is doing as you expect. The GOPList is basically an array of the lines of GOP information from the D2V File. It does search to the correct LBA when the frame number differs from what is expected.
Also all the logic in there is for proper re-construction of the frames from the fields as they are decoded (included repearted fields, top field first etc)

DVD2AVI's preview just plays the file as quickly as it can and doesn't try to be smooth. If you want to play it smoothly then normally you need to just display the frame at the correct intervals rather that as fast as possible (i.e. pay attention to the PTS values)

Not sure if that's much use, but it is at least a description :)

-Nic

jfcarbel
25th September 2003, 19:35
Thanks Nic, I think I understand. So the Decode method is searching for a frame by using the d2v file if it has it and if not doing a binary search?

So if all I want to do is just stream the decoded frames sequentially, then a decode_picture is just needed - correct?

all the logic in there is for proper re-construction of the frames from the fields as they are decodedWouldn't I also need this for just playing the video as well or is this just needed for the modes like Force Film?

So to sum up, was this person correct to use decode_picture only and never call decode?

BTW- I understand about the frame rate now, I found code in this Delphi program that times the frame display so that the correct fps gets rendered.

Guest
26th September 2003, 04:04
Originally posted by int 21h
I think this is it.. its been awhile since I've messed around in this code. That's the old broken one. :)

Guest
26th September 2003, 04:14
Originally posted by jfcarbel
So the Decode method is searching for a frame by using the d2v file if it has it and is not doing a binary search? MPEG2DEC must have the D2V file! Yes, it builds a table in memory from the D2V file and uses that for (a) random access navigation (seeking), (b) 3:2 pulldown (RFF handling), (c) force film processing, and (d) swap fields (which doesn't swap fields in the usual sense, but reverses the field dominance!).

So if all I want to do is just stream the decoded frames sequentially, then a decode_picture is just needed - correct?Not if you want to handle RFF, i.e., do 3:2 pulldown or force film. And you simply can't avoid it for NTSC. Look at the code for my MPEG2DEC3dg version. It is simpler than this one and has comments (not to mention that it is the only version that *correctly* implements random access). The key to understanding the code is to understand the FrameList[] table that is built in memory from the D2V file; all follows from that.

Just in case you are wondering what would be the harm in doing it your way with an NTSC stream, here's the answer. If it is a pure 3:2 pulldown clip, it will be OK; you'll have in effect performed an IVTC by ignoring the RFF flags. You'd need to adjust the frame rate and count properly, of course. But few NTSC streams are pure 3:2 pulldown. For clips that are not pure, you will destroy AV sync and be unable to set a valid, rational frame rate.

Wouldn't I also need this for just playing the video as well or is this just needed for the modes like Force Film? See above.

So to sum up, was this person correct to use decode_picture only and never call decode? For *linear decode* of streams *with no pulldown*, it is adequate. For proper playback, you'd need to pace the display as Nic described.

jfcarbel
26th September 2003, 06:46
Thanks neuron, I forgot all about this complex stuff, so I went back to school and reviewed "THE" guide - http://www.doom9.hu/ivtc-tut.htm

So because of the telecine process used for NTSC, we must pay attention to the flags when decoding otherwise we miss frames that are inserted to keep the frame rate at 29.97 and thus lose audio sync.

Most of the examples I looked at were video only.

So the correct way to decode is to use the showFrame() method that is published via the VFAPI header, since this calls the CMPEG2Decoder::Decode and properly decodes the mpeg2 stream.

However, if FORCE FILM is set in DVD2AVI then will the showFrame() method only output 24fps? And if so how is the audio kept in sync since our frame rate is no longer 29.97 NTSC? This part still confuses me.

Guest
26th September 2003, 15:06
Originally posted by jfcarbel
So because of the telecine process used for NTSC, we must pay attention to the flags when decoding otherwise we miss frames that are inserted to keep the frame rate at 29.97 and thus lose audio sync. Almost. Two points: (1) extra fields are inserted, not frames. (2) Sync problems occur only when parts of the stream are 24fps (telecined) and others are at 30fps. If the stream is pure telecine material, and you ignore the RFF flags and set the frame rate and count appropriately, you will not lose sync. The problem arises when you have 24fps and 30fps parts. If you ignore all RFFs and set the frame rate to 24fps, then the 30fps sections will play at the wrong speed. That is why the force film process actually decimates the 30fps sections. To know which are the 30fps sections, and thus when to decimate, you have to pay attention to the RFF flags.

So the correct way to decode is to use the showFrame() method that is published via the VFAPI header, since this calls the CMPEG2Decoder::Decode and properly decodes the mpeg2 stream. You can use VFAPI if you wish, but you can also implement your own solution. The point is that to be correct you need to honor the RFF flags.

However, if FORCE FILM is set in DVD2AVI then will the showFrame() method only output 24fps? And if so how is the audio kept in sync since our frame rate is no longer 29.97 NTSC? This part still confuses me.For pure (non-hybrid 24/30fps) streams, as long as the frame rate is correct, the time lengths of the audio and video will be correct and there will be sync. The problem arises for mixed streams because setting a 24fps rate for 30fps sections makes those sections longer in time. That is why they are decimated when force film is enabled.

So, when force film is enabled, yes, the frame rate is set back to 24fps. The telecined parts are treated by just ignoring their RFF flags. The non-telecined parts are decimated (i.e., they have 1 in every 5 frames removed) to bring them to 24fps. This results in jerkiness for the non-telecined parts.

But if you are thinking of just using force film for everything that is a really bad idea. I'll give just one reason because it is covered well elsewhere in the forum: Many NTSC films are "hard-telecined", i.e., they have 3:2 pulldown applied before MPEG encoding and thus have no RFF flags set. Applying force film to such streams produces total garbage.

The bottom line is that you need to honor the RFF flags properly. Yes, it is tricky, but I have published working, commented code that you are welcome to use.

Finally, if your goal is only linear playback, you can pick up the TFF/RFF flags as you parse the stream, you do not need to generate an index table. DVD2AVI/MPEG2DEC3 uses an index table to support random access.

jfcarbel
23rd April 2005, 10:55
Finally, if your goal is only linear playback, you can pick up the TFF/RFF flags as you parse the stream, you do not need to generate an index table. DVD2AVI/MPEG2DEC3 uses an index table to support random access.
Its been awhile since I have been able to continue looking into this project. But I want to revisit this discussion. But I think I need to make my goals clear this time.

My project only applies to DVD VOB streams and not Plain Old MPEG-2 streams.

I intend for 2 things:
1) random and high speed scanning of a DVDs VOB (video only)
2) playback via DirectShow since this will provide seamless playback with audio of the VOB stream

So you can see I want random access to the video, but in addition when the user clicks on the preview (play) button, then it will start a seamless linear playback from that point both audio and video. So I am looking to meet both goals. I want to create an interface similar to what you see with DVDShrink. The question is how.

Some technical points:
I now know that I can meet goal 1 by building an index by parsing IFO files and getting the LBAs for the start of each GOP (I-Frame) from the VOBU_ADMAP. A VTS Title can consist of several VOB files, so I can look at how DGMPGDec (DVD2AVI) does seamless LBA jumps from one file to the next.

Correct me if I am wrong, but I am assuming I can use the DGMPGDec code to support this high speed scanning, but I need to modify it to use the I-frame index I built using the IFO VOBU_ADMAP. Not sure how difficult this would be or where to start looking in the code.

I then need to be able at any point in scanning of the video in DGMPGDec to be returned the current LBA it is on. I need this so that when the preview button is selected, I can tell DirectShow to jump to the LBA and being playing.

A few issues/questions:

How do I jump to an LBA using DirectShow to give it a starting point to play the VOB files?

I believe that you cannot tell DirectShows DVD interface to play a set of VOB files like DGMPGDec does. So not sure how I would code using DirectShow to play a Title sets VOB files seamlessly. Any suggestions?

DGMPGDec does not support resizing of the video window, it is fixed to 720x480. I have no idea how to modify this, can someone assist with this or Don can you implement this in a future version? Or maybe I could use something else out there that supports scanning by LBA (maybe VLC player?).

I would like the control to be able to be usable from Delphi. The core code and video window can be written in C++ but be a component (ActiveX maybe) but the scroll bar controlling the video should be in Delphi sending messages to the control. And the resizing of the window in Delphi passing messages to the component. Not sure how easy it would be to componentize DGMPGDec. But this would be a nice feature since many tools are written in Delphi.

I don't believe that DVD2AVI supports individual frame stepping like DVDShrink preview does.

Guest
23rd April 2005, 14:52
Originally posted by jfcarbel
Correct me if I am wrong, but I am assuming I can use the DGMPGDec code to support this high speed scanning, but I need to modify it to use the I-frame index I built using the IFO VOBU_ADMAP. Just knowing the I frame LBAs is not enough to support random frame access. Yes, you can figure out what else you need by examining DGIndex/DGDecode. But I doubt whether you'd want to modify them per se, rather than starting a new base, because you are specifying that your architecture will use DirectShow and LBAs, whereas DGIndex/DGDecode uses a built-in decoder and absolute file byte offsets (to sequence or picture headers).

How do I jump to an LBA using DirectShow to give it a starting point to play the VOB files?

I believe that you cannot tell DirectShows DVD interface to play a set of VOB files like DGMPGDec does. So not sure how I would code using DirectShow to play a Title sets VOB files seamlessly. Any suggestions? I don't know anything about that DirectShow stuff. Just that it sucks for random access.

DGMPGDec does not support resizing of the video window, it is fixed to 720x480. I have no idea how to modify this, can someone assist with this or Don can you implement this in a future version? There is some resizing code in there that is used to downsize large HD video. But can't you use DirectShow functionality?

I don't believe that DVD2AVI supports individual frame stepping like DVDShrink preview does. [/B] DGIndex does not, but DGDecode does. That's the whole raison d'etre of DGDecode!

jfcarbel
23rd April 2005, 16:52
Just knowing the I frame LBAs is not enough to support random frame access.
Well, maybe I don't need random frame access. I am just looking to be able to scroll quickly thru the movie from the start using LBAs. I do not need to for example, jump to frame #77. DVDShrink says he accomplishes this by scanning the IFO for each start of GOP. Could I be wrong in interpreting what he said?

However, I do want to implement a feature called bookmarks. Say a user quickly scans to his favorite scene and may even use the frame arrow keys to step forward frame by frame after using the scroll bar to quickly get near his scene. I am assume the LBA only brings me to the I-Frame, but since the user used the frame advance arrows as well, maybe I do need random "frame" access for this.

Although DVDShrink did have a reauthor mode where he allowed cutting, even on a frame by frame basis. So he must have figured a way to mark where he was at for the cut points.

I don't know anything about that DirectShow stuff. Just that it sucks for random access.
Correct, you can only scan by putting the video in pause mode first, there is no live seamless scanning thru the video. Hence the 2 phase approach in my GUI, where I will use my own I-Frame index and Decoder for the scanning and then when the preview button is pressed, I jump to that LBA for DirectShow and start playing it. I assume this is how DVDShrink does it since he commented that he uses his own decoder, but also says he uses DirectShow for the previews.

There is some resizing code in there that is used to downsize large HD video. But can't you use DirectShow functionality?
If I do that then how will I accomplish what I mentioned above, that is using the DGDecode for my quick frame scanning and jumping to DirectShow only for preview. Won't I need to tell DGDecode how to resize if I am using it for the video scanning?

DGIndex does not, but DGDecode does. That's the whole raison d'etre of DGDecode!
Of course its a frame server. I just mentioned this because I saw in the DGMPGDec Development list under Enhancements 10) (GUI) Frame accuracy. Individual frame stepping (both directions).
Is the Enhancement list at the top of this thread actual items that have been completed or future enhancements?

Guest
23rd April 2005, 17:16
Originally posted by jfcarbel
Well, maybe I don't need random frame access. I am just looking to be able to scroll quickly thru the movie from the start using LBAs. I do not need to for example, jump to frame #77. DVDShrink says he accomplishes this by scanning the IFO for each start of GOP. Could I be wrong in interpreting what he said? If all you need is GOP granularity, then, yes, that is a lot easier. You ought to use the DVDShrink model rather than the DGMPGDec model in that case.

However, I do want to implement a feature called bookmarks. Say a user quickly scans to his favorite scene and may even use the frame arrow keys to step forward frame by frame after using the scroll bar to quickly get near his scene. I am assume the LBA only brings me to the I-Frame, but since the user used the frame advance arrows as well, maybe I do need random "frame" access for this. You can do that with full random access support, or use your proposed method and store the location as I frame plus N forward frame steps. To know the N, the user would have had to have located to the frame by stepping and your code counts the steps and saves that if a bookmark is selected there.

Although DVDShrink did have a reauthor mode where he allowed cutting, even on a frame by frame basis. So he must have figured a way to mark where he was at for the cut points. I don't know what DVDShrink is doing internally, but to cut arbitrarily requires some re-encoding due to the lost reference frame problem.

If I do that then how will I accomplish what I mentioned above, that is using the DGDecode for my quick frame scanning and jumping to DirectShow only for preview. Won't I need to tell DGDecode how to resize if I am using it for the video scanning? I said I didn't think DGDecode was suitable for this purpose.

Is the Enhancement list at the top of this thread actual items that have been completed or future enhancements? That's work to be done.

jfcarbel
23rd April 2005, 17:59
Not sure how easy it would be to componentize DGMPGDec. But this would be a nice feature since many tools are written in Delphi.
Don, do you think this could be added to the development list as a future enhancement? That is wrapping the DGMPGDec Decode video window into an ActiveX component. So all the core logic would be in the component and your GUI just sends messages to it and responds to events.

I know that the actual decoder is a separate DLL so that other tools may use just it. But maybe its not that difficult to compile a DLL into a ActiveX control so its packaged with the ActiveX control.

I know there are alot of good books on COM (ActiveX) building. I think its really just a interface wrapper around the code and a special way to compile it.

This way you could focus on the core functions and others could pretty up the GUI or write their own or better yet incorporate the DGMPGDec video window right in their app. For example, it looks like Gordian Knot uses WMP for the preview, but could use your video window to show the preview. Since Delphi can use ActiveX controls.