Welcome to Doom9's Forum, THE in-place to be for everyone interested in DVD conversion. Before you start posting please read the forum rules. By posting to this forum you agree to abide by the rules. |
14th June 2009, 20:10 | #24 | Link |
Registered User
Join Date: Oct 2007
Posts: 713
|
Code:
# MCBob v0.3: # nnedi and nnedibob was made possible by tritical and the fellow Doom9 community who contributed CPU cycles. # Another approach to motion compensated bobbing, build by Didée. # # ( Between-all-chairs version with some quick hacks ) # ( v0.3c: as stated above, but worse ;-) ) # ( v0.3u (unofficial): use new nnEDI interpolater by tritical, modded by Terranigma) # Features: # # - No residual combing, due to STT (Shape Transposition Technology) # - Works without thresholds (with adaptive thresholds instead of fixed ones) # - Motion Search between fields of same parity, for maximum flicker/bob reduction in motion areas # - Motion Masking adaptive to local complexity, for maximum flicker/bob reduction in static areas # - spatial Interpolation overweights spatio-temporal interpolation # ( in areas where the information obtained from temporal neighbors in itself was only spatially # interpolated, use a mix of spatial and spatio-temporal interpolation ) # - error correction for temporal interpolation is fully self adaptive # # Prerequisites: # # - MVTools, preferably v1.4.13 (or newer) # - MaskTools v2.0 # - nnEDI 1.3 + # - RemoveGrain/Repair package # - ReduceFlicker (if temp-NR for ME is used) # - MedianBlur by tsp function MCBob(clip clp, float "EdiPre", int "EdiPost", int "blocksize", int "MEdepth", float "sharpness", int "mtnmode", float "mtnth1", float "mtnth2", float "errth1", float "errth2", float "MEspatNR", float "MEtempNR") { EdiPre = default( EdiPre, 1.0 ) # What bob to start with: 0.0 = dumbbob, 1.0 = nnEdiBob, inbetween = mix of both EdiPost = default( EdiPost, 2 ) # 0 = no nnEDI PP / 1 = Framesized nnEdi PP / Average two Fieldbased nnEdi PP's bs = default( blocksize, 16 ) # Blocksize for motion search me = default( MEdepth, 2 ) # Search effort of motion search sharpness = (EdiPost==2) \ ? default( sharpness, 0.7 ) \ : default( sharpness, 1.0 ) # use slight sharpening before STT routine mtnmode = default( mtnmode, 1 ) # 0 = use only same-parity motion check, 1|2 use an additional # inter-parity check: 1 = on vertical edges / 2 = not on horizontal edges mtnth1 = default( mtnth1, 0.20 ) # below this %age of local min/max is static mtnth2 = default( mtnth2, 0.40 ) # above this %age of local min/max is motion errth1 = default( errth1, 0.40 ) # similar for error detection errth2 = default( errth2, 0.60 ) # of motion interpolation errors MEspatNR = default( MEspatNR, 0.00 ) # amount of spatial NR (for motion search only) MEtempNR = default( MEtempNR, 0.00 ) # amount of temporal NR (for motion search only) order = (clp.GetParity == True) ? 0 : 1 ORDR = (order==0) ? "TFF" : "BFF" ox = clp.width() oy = clp.height() ERTH1 = string(errth1) ERTH2 = string(errth2) MNTH1 = string(mtnth1) MNTH2 = string(mtnth2) SSTR = string(sharpness) idx_1 = 10 idx_2 = (MEspatNR==0.0 && MEtempNR==0.0) ? idx_1 : idx_1+2 idx_3 = idx_2 + 2 # Create basic operations that we will work with # ============================================== # Basic Field & Bob clips # ----------------------- flatbob = clp.Bob(1,0) normbob = clp.Bob(0.0,0.5) ofields = clp.SeparateFields() oweave = clp.DoubleWeave() nnedibobbed = clp.nnEDIbob() bobbed = (EdiPre == 0.0) ? normbob \ : (EdiPre == 1.0) ? nnedibobbed \ : normbob.merge(nnedibobbed,EdiPre) # Mask to check if motion compensation has delivered only the neighbor's spatial interpolated part # ------------------------------------------------------------------------------------------------ black = Blankclip(ofields).mt_lut("0").Trim(1,1).Loop(Framecount(clp)) white = Blankclip(ofields).mt_lut("255").Trim(1,1).Loop(Framecount(clp)) interpol = Interleave(black,white,white,black).AssumeFieldbased().AssumeParity(ORDR).Weave() # Vertical Edge mask, needed for more safe motion masking # ------------------------------------------------------- Vedge = bobbed.mt_Edge("1 0 -1 2 0 -2 1 0 -1",0,255,0,255,U=1,V=1) Vedge2 = Vedge.mt_Inpand(mode="vertical").mt_Inpand(mode="vertical").mt_Expand(mode="vertical").mt_Expand(mode="vertical") Vedge = mt_Lutxy(Vedge,Vedge2,yexpr="y 2 - 2 * x > x y 2 - 2 * ?") #.mt_Expand() Hedge = bobbed.mt_Edge("1 2 1 0 0 0 -1 -2 -1",0,255,0,255,U=1,V=1) Hedge = Hedge.mt_logic(Hedge.temporalsoften(1,255,0,255,2),"max") # If requested, do flicker reduction before searching motion vectors # ------------------------------------------------------------------- (MEspatNR==0.0) ? bobbed : bobbed.Merge(bobbed.minblur(2,uv=3),MEspatNR) (MEtempNR==0.0) ? last : last.Merge(reduceflicker(2),MEtempNR) srch=last # Perform Motion Search # --------------------- lmbda = 128 pnw = 40 bw_vec2 = srch.SelectEven().MVAnalyse(isb=true, truemotion=false,delta=1,lambda=lmbda,pel=2,searchparam=me,sharp=2,blksize=bs,overlap=1*bs/2,pnew=pnw,idx=idx_1) fw_vec2 = srch.SelectEven().MVAnalyse(isb=false,truemotion=false,delta=1,lambda=lmbda,pel=2,searchparam=me,sharp=2,blksize=bs,overlap=1*bs/2,pnew=pnw,idx=idx_1) bw_vec3 = srch.SelectOdd() .MVAnalyse(isb=true, truemotion=false,delta=1,lambda=lmbda,pel=2,searchparam=me,sharp=2,blksize=bs,overlap=1*bs/2,pnew=pnw,idx=idx_1+1) fw_vec3 = srch.SelectOdd() .MVAnalyse(isb=false,truemotion=false,delta=1,lambda=lmbda,pel=2,searchparam=me,sharp=2,blksize=bs,overlap=1*bs/2,pnew=pnw,idx=idx_1+1) # Create RAW motion interpolation # ------------------------------- alt_1 = bobbed.SelectEven().MVFlowInter(bw_vec2,fw_vec2,time=50.0,thSCD1=64*18,thSCD2=227,idx=idx_2) alt_2 = bobbed.SelectOdd() .MVFlowInter(bw_vec3,fw_vec3,time=50.0,thSCD1=64*18,thSCD2=227,idx=idx_2+1).DuplicateFrame(0) alt = Interleave(alt_2,alt_1) # Create motion interpolation of "nothing new" mask # ------------------------------------------------- interpol_1 = interpol.SelectEven().MVFlowInter(bw_vec2,fw_vec2,time=50.0,thSCD1=64*8,thSCD2=127,idx=idx_3) interpol_2 = interpol.SelectOdd() .MVFlowInter(bw_vec3,fw_vec3,time=50.0,thSCD1=64*8,thSCD2=127,idx=idx_3+1).DuplicateFrame(0) interpol_comp= Interleave(interpol_2,interpol_1) nothing_new = mt_lutxy(interpol,interpol_comp,"x y * 255 / 255 / 1 2 / ^ 160 *") # Error check of motion interpolation # =================================== # Errors that are neutralized by errors in direct vertical neighborhood are not considered, because bob-typical. # Remaining error is checked against [min,max] of local error to decide if it's valid or not. # # Build error mask, neutralize vertical-only errors # --------------------------------------------------- altD = mt_Makediff(bobbed,alt,U=3,V=3) altDmin = altD.mt_Inpand(mode="vertical",U=3,V=3) altDmin = altDmin.mt_Deflate().mt_Merge(altDmin,Vedge,U=4,V=4) altDmax = altD.mt_Expand(mode="vertical",U=3,V=3) altDmax = altDmax.mt_Inflate().mt_Merge(altDmax,Vedge,U=4,V=4) altDmm = mt_Lutxy(altDmax.mt_Expand(mode="horizontal",U=3,V=3),altDmin.mt_Inpand(mode="horizontal",U=3,V=3),"x y -",U=3,V=3) altDmm = altDmm.mt_Inflate().mt_Merge(altDmm,Vedge,U=4,V=4) altD1 = altD .mt_Lutxy(altDmin,"x 128 - y 128 - * 0 < 128 x 128 - abs y 128 - abs < x y ? ?",U=3,V=3) altD1 = altD1.mt_Lutxy(altDmax,"x 128 - y 128 - * 0 < 128 x 128 - abs y 128 - abs < x y ? ?",U=3,V=3) altD2 = altD.Repair(altD1,1) # Build correction mask by combining: error mask + "nothing new" mask + a scenechange mask # --------------------------------------------------------------------------------------------- corrmask = mt_Lutxy(altD2,altDmm,"x 128 - abs 2 - y 2 + / "+ERTH1+" - "+ERTH2+" "+ERTH1+" - / 255 *",U=3,V=3).mt_Expand(U=3,V=3) sc = corrmask.BilinearResize(64,64) sc = mt_LutF(sc,sc,mode="average",expr="x 255 0.6 * > 255 0 ?").PointResize(ox,oy) corrmask = corrmask.mt_Logic(nothing_new,"max",U=2,V=2) corrmask = corrmask.mt_Logic(sc,"max",U=2,V=2) # Create a first bob from motion interpolation, not yet error corrected ... # ------------------------------------------------------------------------- # ***( temporarily changed ... yet unsure what works best )*** Interleave(bobbed,alt).AssumeParity(ORDR) SeparateFields().SelectEvery(8,0,3,5,6).Weave() naked= last naked2 = last.vinverseD(1.6) # flatbob # naked_mm = naked.mt_Edge("min/max",0,255,0,255,U=1,V=1) edibb_mm = nnedibobbed.mt_Edge("min/max",0,255,0,255,U=1,V=1).mt_Expand(mode="vertical") check2 = mt_LutXY(naked_mm,edibb_mm,"x y / 3 - 5 3 - / 255 *") corrmask = corrmask.mt_Logic(check2,"max",U=2,V=2) # ... and build a motion mask from this one. # ------------------------------------------ # ***( temporarily changed ... tickertapes might suffer. )*** stc = bobbed .removegrain(2)# oweave.removegrain(11) mm = stc.mt_Edge("min/max",0,255,0,255,U=3,V=3) # mm = mm .mt_Logic(mm.DuplicateFrame(0),"max",U=3,V=3).mt_Logic(mm.DeleteFrame(0),"max",U=3,V=3) # max = stc.mt_expand(U=3,V=3) # max = max.mt_logic(max.Duplicateframe(0),"max",U=3,V=3).mt_logic(max.Duplicateframe(0).Duplicateframe(0),"max",U=3,V=3) # min = stc.mt_inpand(U=3,V=3) # min = min.mt_logic(min.Duplicateframe(0),"min",U=3,V=3).mt_logic(min.Duplicateframe(0).Duplicateframe(0),"min",U=3,V=3) # mm = mt_LutXY(max,min,"x y -",U=3,V=3) diff2prev1 = mt_LutXY(stc,stc.DuplicateFrame(0),"x y - abs",U=3,V=3) diff2prev2 = mt_LutXY(stc,stc.DuplicateFrame(0).DuplicateFrame(0),"x y - abs",U=3,V=3) diff2prev12 = (mtnmode==0) ? diff2prev2 : \ (mtnmode==1) ? diff2prev2 .mt_Merge(diff2prev1,Vedge,U=2,V=2) \ : diff2prev1 .mt_Merge(diff2prev2,Hedge,U=2,V=2) motn = diff2prev12.mt_Logic(diff2prev12.DeleteFrame(0),"max",U=3,V=3).mt_Logic(diff2prev12.DeleteFrame(0).DeleteFrame(0),"max",U=3,V=3) notstatic = mt_LutXY(motn,mm,"x 1 - y 1 + / "+MNTH1+" - "+MNTH2+" "+MNTH1+" - / 255 *",U=3,V=3).mt_Expand(U=3,V=3).mt_Inpand(U=3,V=3) # notstatic = notstatic.mt_Logic(notstatic.RemoveGrain(4),"max",U=3,V=3).mt_Expand(U=3,V=3).mt_Inpand(U=3,V=3) # Now do the error correction of the "naked" MC-bob # ------------------------------------------------- naked .mt_Merge(nnedibobbed,corrmask,luma=false,U=3,V=3) .VinverseD(2.7-sharpness) repaired = last # If requested, sharpen the corrected MC-bob up a little # ( pre-sharpen for EdiPost = 0 | 1 ) # ------------------------------------------------------ shrpbase = last#.MinBlur(1,1).Merge(RemoveGrain(12,-1),0.23) shrp = mt_LutXY(shrpbase,shrpbase.RemoveGrain(11,-1),"x x y - abs 16 / 1 1 x y - abs 1 4 / ^ + / ^ 16 * "+SSTR+" * x y - x y - abs 1.3 + / * 1 x y - abs 16 / 1 4 / ^ + / +",U=2,V=2) # \ .Repair(repaired,1,0) shrpD = mt_Makediff(shrpbase,shrp) (sharpness==0.0 || EdiPost==2) ? last : last .mt_Makediff(MergeLuma(shrpD.MinBlur(1,uv=1),shrpD.RemoveGrain(12,-1),0.24),U=2,V=2) # If requested, do additional PP via nnEDI2 # ---------------------------------------- oweave.mt_merge(last,notstatic,luma=false,U=3,V=3) AssumeTFF() edisingle = nnedi(dh=true,field=1).LanczosResize(ox,oy,0,-0.5,ox,2*oy+0.001,taps=3) edidouble = merge(nnedi(field=1),nnedi(field=0),0.5) edidoubleD = mt_makediff(last,edidouble,U=3,V=3) (EdiPost==1) ? edisingle : \ (EdiPost==2) ? edidouble : last # ( post-sharpen for EdiPost = 2 ) # ------------------------------------------------------ edidoubleshrpD = mt_makediff(edidouble,sharpness==1.0?edidouble.removegrain(20):edidouble.removegrain(20).merge(edidouble,1.0-sharpness),U=3,V=3) edidoubleshrpD = edidoubleshrpD.repair(edidoubleD,13) (EdiPost==2) ? edidouble.mt_adddiff(edidoubleshrpD,U=3,V=3) : last # STT (Shape Transposition Technology) Routine: # ============================================= # Simply weaving the corrected output with the original fields is bad, because the risk of # creating unwanted residual combing is too high. # Instead, the vertical "shape" is taken off the corrected output, and transposed # onto the fixed "poles" of the original fields' scanlines. Et Voila. # ---------------------------------------------------------------------------------------- synthbob = last.AssumeParity(ORDR).SeparateFields().SelectEvery(4,0,3).Weave().Bob(1,0) mapped_new = flatbob.mt_makediff(mt_makediff(synthbob,last,U=3,V=3),U=3,V=3) newfields = mapped_new.AssumeParity(ORDR).SeparateFields().SelectEvery(4,1,2) mappedbob = Interleave(ofields,newfields).SelectEvery(4,0,1,3,2).AssumeParity(ORDR).Weave() # Finally, for static areas use just original fields # -------------------------------------------------- mappedbob #bobbed oweave.mt_merge(last,notstatic.mt_inpand(Y=2,U=2,V=2),luma=false,U=3,V=3) # Lastly, set correct parity for the bobbed clip # ---------------------------------------------- (order==0) ? AssumeTFF() : AssumeBFF() return(last) } # =============================================== ############################ # Helper functions below # ############################ ## Function nnEDIbob, courtesty of tritical: # slow, but accurate nnEDI-bob, always dumb ;) Function nnEDIbob(clip Input) { Input.nnedi(field=-2) } # Helper to simplify script function AssumeParity(clip clp, string "order") { order == "TFF" ? clp.assumeTFF() : clp.assumeBFF() return(last) } # Kill Combing Function function VinverseD(clip clp, float "sstr", int "amnt", int "uv") { uv = default(uv,3) sstr = default(sstr,2.7) amnt = default(amnt,255) uv2 = (uv==2) ? 1 : uv STR = string(sstr) AMN = string(amnt) vblur = clp.mt_convolution("1","50 99 50",U=uv,V=uv) vblurD = mt_makediff(clp,vblur,U=uv2,V=uv2) Vshrp = mt_lutxy(vblur,vblur.mt_convolution("1","1 4 6 4 1",U=uv2,V=uv2),expr="x x y - "+STR+" * +",U=uv2,V=uv2) VshrpD = mt_makediff(Vshrp,vblur,U=uv2,V=uv2) VlimD = mt_lutxy(VshrpD,VblurD,expr="x 128 - y 128 - * 0 < x 128 - abs y 128 - abs < x y ? 128 - 0.25 * 128 + x 128 - abs y 128 - abs < x y ? ?",U=uv2,V=uv2) mt_adddiff(Vblur,VlimD,U=uv,V=uv) (amnt>254) ? last : (amnt==0) ? clp : mt_lutxy(clp,last,expr="x "+AMN+" + y < x "+AMN+" + x "+AMN+" - y > x "+AMN+" - y ? ?",U=uv,V=uv) return(last) } # Nifty Gauss/Median combination function MinBlur(clip clp, int r, int "uv") { uv = default(uv,3) uv2 = (uv==2) ? 1 : uv rg4 = (uv==3) ? 4 : -1 rg11 = (uv==3) ? 11 : -1 rg20 = (uv==3) ? 20 : -1 medf = (uv==3) ? 1 : -200 RG11D = (r==1) ? mt_makediff(clp,clp.removegrain(11,rg11),U=uv2,V=uv2) \ : (r==2) ? mt_makediff(clp,clp.removegrain(11,rg11).removegrain(20,rg20),U=uv2,V=uv2) \ : mt_makediff(clp,clp.removegrain(11,rg11).removegrain(20,rg20).removegrain(20,rg20),U=uv2,V=uv2) RG4D = (r==1) ? mt_makediff(clp,clp.removegrain(4,rg4),U=uv2,V=uv2) \ : (r==2) ? mt_makediff(clp,clp.medianblur(2,2*medf,2*medf),U=uv2,V=uv2) \ : mt_makediff(clp,clp.medianblur(3,3*medf,3*medf),U=uv2,V=uv2) DD = mt_lutxy(RG11D,RG4D,"x 128 - y 128 - * 0 < 128 x 128 - abs y 128 - abs < x y ? ?",U=uv2,V=uv2) clp.mt_makediff(DD,U=uv,V=uv) return(last) } |
14th June 2009, 20:44 | #25 | Link |
Avisynth language lover
Join Date: Dec 2007
Location: Spain
Posts: 3,431
|
Yes.
An alternative is to do what thewebchat suggested at post #6, create a wrapper function called nnedi that just calls nnedi2. In the longer term, it might be useful to use the new function nnedi2_rpow2 where appropriate instead of nnedi2 directly. But that requires more thought and care. |
14th June 2009, 20:50 | #26 | Link |
Huh?
Join Date: Sep 2003
Location: Uruguay
Posts: 3,103
|
Thanks for a new release of such a great plugin, tritical
From examining the comparison pics, nnedi2 v2 looks a wee bit softer and with far less artifacts (if any) than nnedi v1.3. The white fences and house walls in the second lighthouse comparison are a really good indicator of the lack of artifacting, it's pretty amazing. I especially like that now nnediresize2x.avs is no longer needed and we can simply use nnedi2_rpow2(qual=3,rfactor=2,cshift="spline36resize") instead. By the way, how does the resizer choice affect the output of the cshift parameter? One thing, though: there were some cases (I think they were with animated content, not sure though) where EEDI2 would yield better results than NNEDI v1.3. Would it be possible for you to incluse EEDI2 in that comparison?
__________________
Read Decomb's readmes and tutorials, the IVTC tutorial and the capture guide in order to learn about combing and how to deal with it. |
15th June 2009, 01:23 | #27 | Link | ||
Registered User
Join Date: Dec 2003
Location: MO, US
Posts: 999
|
Quote:
xxxresize(fwidth,fheight,hshift,vshift,rfactor*owidth,rfactor*oheight) Where 'xxxresize' is the resizer you specified with 'cshift', and owidth/oheight are the original input image width/height. fwidth/fheight are set equal to rfactor*owidth and rfactor*oheight if you don't specify them. hshift/vshift are the shifts to cancel the center image shift introduced by nnedi2. So if you don't set fwidth/fheight there wont be much, if any, difference between resizers because the scaling factors are so close to 1. Also, I just realized that how I coded it wont work for bicubicresize because the syntax is bicubicresize(clip clip, int target_width, int target_height, float b, float c, float src_left, float src_top, float src_width, float src_height) It also has the limitation that you can't pass extra arguments to the resizer ala gauss/blackman/lanczos/bicubic. I'll fix these issues in the next version. Quote:
Last edited by tritical; 15th June 2009 at 02:00. |
||
15th June 2009, 03:36 | #29 | Link |
Registered User
Join Date: Nov 2007
Posts: 246
|
nnedi2_rpow2(rfactor=8,cshift="lanczosresize",fwidth=width*7,fheight=height*7)
Nice plug-in. I was wondering if they will be a difference between the above line and applying three times ( after or before each iteration of NNEDI2 ) lanczos resize with the resize factor of (7/8)^(1/3). Edit: Initial Size=Size0. If downsizing after enlarging. Enlarge by 2. Resize to Size1=int(Size0*2*(7/8)^(1/3)). Enlarge by 2. Resize to Size2=int(Size0*4*(7/8)^(2/3)) Enlarge by 2. Resize to Size0*7. Last edited by mikenadia; 15th June 2009 at 18:47. Reason: following Gavino's comment. |
15th June 2009, 13:21 | #31 | Link |
Registered User
Join Date: Apr 2005
Posts: 213
|
I have problems with the cpu optimizations.
Code:
LoadPlugin("plugins\NNEDI2\nnedi2.dll") ImageSource("clown__original.png", end=0, use_DevIL=False, pixel_type="RGB24") nnedi2_rpow2(rfactor=2) Code:
VirtualDub crash report -- build 31536 (release) -------------------------------------- Disassembly: 0227a380: 0000 add [eax], al 0227a382: 0f59af50010000 mulps xmm5, [edi+150] 0227a389: 0f59b710020000 mulps xmm6, [edi+210] 0227a390: 0f59bfd0020000 mulps xmm7, [edi+2d0] 0227a397: 0f58c4 addps xmm0, xmm4 0227a39a: 0f58cd addps xmm1, xmm5 0227a39d: 0f58d6 addps xmm2, xmm6 0227a3a0: 0f58df addps xmm3, xmm7 0227a3a3: 0f28a1a0000000 movaps xmm4, [ecx+a0] 0227a3aa: 0f28ec movaps xmm5, xmm4 0227a3ad: 0f28f4 movaps xmm6, xmm4 0227a3b0: 0f28fc movaps xmm7, xmm4 0227a3b3: 0f59a7a0000000 mulps xmm4, [edi+a0] 0227a3ba: 0f59af60010000 mulps xmm5, [edi+160] 0227a3c1: 0f59b720020000 mulps xmm6, [edi+220] 0227a3c8: 0f59bfe0020000 mulps xmm7, [edi+2e0] 0227a3cf: 0f58c4 addps xmm0, xmm4 0227a3d2: 0f58cd addps xmm1, xmm5 0227a3d5: 0f58d6 addps xmm2, xmm6 0227a3d8: 0f58df addps xmm3, xmm7 0227a3db: 0f28a1b0000000 movaps xmm4, [ecx+b0] 0227a3e2: 0f28ec movaps xmm5, xmm4 0227a3e5: 0f28f4 movaps xmm6, xmm4 0227a3e8: 0f28fc movaps xmm7, xmm4 0227a3eb: 0f59a7b0000000 mulps xmm4, [edi+b0] 0227a3f2: 0f59af70010000 mulps xmm5, [edi+170] 0227a3f9: 0f59b730020000 mulps xmm6, [edi+230] 0227a400: 0f59bff0020000 mulps xmm7, [edi+2f0] 0227a407: 0f58c4 addps xmm0, xmm4 0227a40a: 0f58cd addps xmm1, xmm5 0227a40d: 0f58d6 addps xmm2, xmm6 0227a410: 0f58df addps xmm3, xmm7 0227a413: f20f db 0fh <-- FAULT 0227a415: 7cc1 jl 0227a3d8 0227a417: f20f db 0fh 0227a419: 7cd3 jl 0227a3ee 0227a41b: f20f db 0fh 0227a41d: 7cc2 jl 0227a3e1 0227a41f: 0f2900 movaps [eax], xmm0 0227a422: 81c700030000 add edi, 00000300 0227a428: 83c010 add eax, 10h 0227a42b: 83ea04 sub edx, 04h 0227a42e: 0f857dfdffff jnz 0227a1b1 0227a434: 8b442410 mov eax, [esp+10h] 0227a438: 8b542414 mov edx, [esp+14h] 0227a43c: 33c9 xor ecx, ecx 0227a43e: 0f280488 movaps xmm0, [eax+ecx*4] 0227a442: 0f58048f addps xmm0, [edi+ecx*4] 0227a446: 0f290488 movaps [eax+ecx*4], xmm0 0227a44a: 83c104 add ecx, 04h 0227a44d: 83ea04 sub edx, 04h 0227a450: 75ec jnz 0227a43e 0227a452: 5f pop edi 0227a453: c3 ret 0227a454: cc int 3 0227a455: cc int 3 0227a456: cc int 3 0227a457: cc int 3 0227a458: cc int 3 0227a459: cc int 3 0227a45a: cc int 3 0227a45b: cc int 3 0227a45c: cc int 3 0227a45d: cc int 3 0227a45e: cc int 3 0227a45f: cc int 3 0227a460: 57 push edi 0227a461: 8b4c2408 mov ecx, [esp+08h] 0227a465: 8b7c240c mov edi, [esp+0ch] 0227a469: 8b442410 mov eax, [esp+10h] 0227a46d: 8b542414 mov edx, [esp+14h] 0227a471: 0f2801 movaps xmm0, [ecx] 0227a474: 0f28c8 movaps xmm1, xmm0 0227a477: 0f28d0 movaps xmm2, xmm0 0227a47a: 0f28d8 movaps xmm3, xmm0 0227a47d: 0f5907 mulps xmm0, [edi] Built on Aegis on Sat Mar 28 15:17:50 2009 using compiler version 1400 Windows 5.1 (Windows XP x86 build 2600) [Service Pack 3] EAX = 02389a50 EBX = 00000000 ECX = 014d2350 EDX = 00000004 EBP = 024dfef8 ESI = 02389a50 EDI = 014d3640 ESP = 024dfecc EIP = 0227a413 EFLAGS = 00010206 FPUCW = ffff027f FPUTW = ffffffff Crash reason: Illegal instruction Crash context: An SSE2 (Pentium 4/Athlon 64) instruction not supported by the CPU was executed in module 'nnedi2'. Pointer dumps: EAX 02389a50: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 ECX 014d2350: bd582d8e bbbb6667 3b0b3536 3c91e728 3b0b3536 bd582d8e 3b0b3536 3c91e728 ESI 02389a50: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 EDI 014d3640: bf489640 3de50f20 bc95bba8 3ee5be34 bf1d7202 beb635a4 bed43c6b bc04f6ba ESP 024dfec8: 00244c20 014d3640 0227b768 014d2350 014d3640 02389a50 00000004 022e03dc 024dfee8: 014d2278 00000000 0227c9d3 022e0217 000000e0 0227c9ff 02389a50 7c802530 024dff08: 024dff74 7c80a0db 014d2350 02335664 022e03d0 02389a50 014d3640 00000005 024dff28: fffffe40 00000204 00000180 000000cc 42957556 00000000 00055294 000000c0 EBP 024dfef8: 000000e0 0227c9ff 02389a50 7c802530 024dff74 7c80a0db 014d2350 02335664 024dff18: 022e03d0 02389a50 014d3640 00000005 fffffe40 00000204 00000180 000000cc 024dff38: 42957556 00000000 00055294 000000c0 00000380 000001c0 014d9c60 000001c0 024dff58: 000001c0 0227cb52 014d0000 7c920222 014db4c0 00000000 7c920222 024dffac Thread call stack: 0227a413: nnedi2!0000a413 0227b768: nnedi2!0000b768 0227c9d3: nnedi2!0000c9d3 0227c9ff: nnedi2!0000c9ff 0227cb52: nnedi2!0000cb52 7c920222: ntdll!RtlAllocateHeap [7c910000+100c4+15e] 7c920222: ntdll!RtlAllocateHeap [7c910000+100c4+15e] 02271555: nnedi2!00001555 7c920222: ntdll!RtlAllocateHeap [7c910000+100c4+15e] 022715fa: nnedi2!000015fa 7c920222: ntdll!RtlAllocateHeap [7c910000+100c4+15e] 7c80b729: kernel32!GetModuleFileNameA [7c800000+b56f+1ba] 7c920222: ntdll!RtlAllocateHeap [7c910000+100c4+15e] -- End of report |
15th June 2009, 18:41 | #32 | Link | |||
Registered User
Join Date: Dec 2003
Location: MO, US
Posts: 999
|
Quote:
Quote:
Quote:
|
|||
15th June 2009, 20:20 | #33 | Link | |
Avisynth language lover
Join Date: Dec 2007
Location: Spain
Posts: 3,431
|
Quote:
|
|
15th June 2009, 22:01 | #34 | Link | |
Resize Abuser
Join Date: Apr 2005
Location: Seattle, WA
Posts: 623
|
Quote:
__________________
Mine: KenBurnsEffect/ZoomBox CutFrames Helped: DissolveAGG ColorBalance LQ Animation Fixer |
|
15th June 2009, 23:34 | #35 | Link |
Registered User
Join Date: Apr 2005
Posts: 213
|
Here another comparison.
Source: Spline36Resize: EEDI2: NNEDI: NNEDI2: NNEDI and NNEDI2 after strong sharpening and decent denoising (using identical parameters in both cases). NNEDI + SuperSlowSharpen + dfttest: NNEDI2 + SuperSlowSharpen + dfttest: Last edited by Archimedes; 16th June 2009 at 00:20. |
17th June 2009, 20:59 | #36 | Link | ||
Registered User
Join Date: Dec 2003
Location: MO, US
Posts: 999
|
Quote:
Quote:
New version should be ready soon. |
||
18th June 2009, 01:33 | #37 | Link |
Derek Prestegard IRL
Join Date: Nov 2003
Location: Los Angeles
Posts: 5,989
|
Nice work, as always, tritical!!! Very very impressive, a nice step up from NNEDI. As soon as things stabilize I will be using this on everything! ~MiSfit
__________________
These are all my personal statements, not those of my employer :) |
18th June 2009, 22:49 | #38 | Link |
Registered User
Join Date: Dec 2003
Location: MO, US
Posts: 999
|
I updated the zip archive to v1.2, changes:
Code:
+ remove mod2 height restriction when dh=true + more assembly optimizations/tweaks - fixed automatic cpu detection - fixed issues with nnedi2_rpow2 - always correct yv12 vertical chroma shift - use arg names when invoking resizers - allow users to pass optional resizer arguments (ep0/ep1 parameters) Last edited by tritical; 18th June 2009 at 22:57. |
19th June 2009, 11:49 | #40 | Link |
Registered User
Join Date: Apr 2005
Posts: 213
|
tritical, thank you for updating, but the problem with cpu detection still exists.
nnedi2_rpow2(rfactor=2) results in: Code:
VirtualDub crash report -- build 31536 (release) -------------------------------------- Disassembly: 01e7b640: 8d348e lea esi, [esi+ecx*4] 01e7b643: 660f6f2e movdqa xmm5, [esi] 01e7b647: 660f6f5e10 movdqa xmm3, [esi+10h] 01e7b64c: 660f db 0fh 01e7b64e: 3a0f cmp cl, [edi] 01e7b650: dd05660f6f14 fld qword ptr [146f0f66] 01e7b656: 4e dec esi 01e7b657: 660f6f644e10 movdqa xmm4, [esi+ecx*2+10h] 01e7b65d: 660f db 0fh 01e7b65f: 3a0f cmp cl, [edi] 01e7b661: e205 loop 01e7b668 01e7b663: e91d030000 jmp 01e7b985 01e7b668: eb06 jmp 01e7b670 01e7b66a: 8d9b00000000 lea ebx, [ebx+00] 01e7b670: 660f6f1e movdqa xmm3, [esi] 01e7b674: 660f6f4610 movdqa xmm0, [esi+10h] 01e7b679: 660f db 0fh 01e7b67b: 3a0f cmp cl, [edi] 01e7b67d: c3 ret 01e7b67e: 06 push es 01e7b67f: 660f6f144e movdqa xmm2, [esi+ecx*2] 01e7b684: 660f6f4c4e10 movdqa xmm1, [esi+ecx*2+10h] 01e7b68a: 660f db 0fh 01e7b68c: 3a0f cmp cl, [edi] 01e7b68e: ca068d retf 8d06 01e7b691: 348e xor al, 8eh 01e7b693: 660f6f2e movdqa xmm5, [esi] 01e7b697: 660f6f5e10 movdqa xmm3, [esi+10h] 01e7b69c: 660f db 0fh 01e7b69e: 3a0f cmp cl, [edi] 01e7b6a0: dd06 fld qword ptr [esi] 01e7b6a2: 660f6f144e movdqa xmm2, [esi+ecx*2] 01e7b6a7: 660f6f644e10 movdqa xmm4, [esi+ecx*2+10h] 01e7b6ad: 660f db 0fh 01e7b6af: 3a0f cmp cl, [edi] 01e7b6b1: e206 loop 01e7b6b9 01e7b6b3: e9cd020000 jmp 01e7b985 01e7b6b8: eb06 jmp 01e7b6c0 01e7b6ba: 8d9b00000000 lea ebx, [ebx+00] 01e7b6c0: 660f6f1e movdqa xmm3, [esi] 01e7b6c4: 660f6f4610 movdqa xmm0, [esi+10h] 01e7b6c9: 660f db 0fh <-- FAULT 01e7b6cb: 3a0f cmp cl, [edi] 01e7b6cd: c3 ret 01e7b6ce: 07 pop es 01e7b6cf: 660f6f144e movdqa xmm2, [esi+ecx*2] 01e7b6d4: 660f6f4c4e10 movdqa xmm1, [esi+ecx*2+10h] 01e7b6da: 660f db 0fh 01e7b6dc: 3a0f cmp cl, [edi] 01e7b6de: ca078d retf 8d07 01e7b6e1: 348e xor al, 8eh 01e7b6e3: 660f6f2e movdqa xmm5, [esi] 01e7b6e7: 660f6f5e10 movdqa xmm3, [esi+10h] 01e7b6ec: 660f db 0fh 01e7b6ee: 3a0f cmp cl, [edi] 01e7b6f0: dd07 fld qword ptr [edi] 01e7b6f2: 660f6f144e movdqa xmm2, [esi+ecx*2] 01e7b6f7: 660f6f644e10 movdqa xmm4, [esi+ecx*2+10h] 01e7b6fd: 660f db 0fh 01e7b6ff: 3a0f cmp cl, [edi] 01e7b701: e207 loop 01e7b70a 01e7b703: e97d020000 jmp 01e7b985 01e7b708: eb06 jmp 01e7b710 01e7b70a: 8d9b00000000 lea ebx, [ebx+00] 01e7b710: 660f6f1e movdqa xmm3, [esi] 01e7b714: 660f6f4610 movdqa xmm0, [esi+10h] 01e7b719: 660f db 0fh 01e7b71b: 3a0f cmp cl, [edi] 01e7b71d: c3 ret 01e7b71e: 08660f or [esi+0fh], ah 01e7b721: 6f outsd 01e7b722: 144e adc al, 4eh 01e7b724: 660f6f4c4e10 movdqa xmm1, [esi+ecx*2+10h] 01e7b72a: 660f db 0fh 01e7b72c: 3a0f cmp cl, [edi] 01e7b72e: ca088d retf 8d08 01e7b731: 348e xor al, 8eh 01e7b733: 660f6f2e movdqa xmm5, [esi] 01e7b737: 660f6f5e10 movdqa xmm3, [esi+10h] 01e7b73c: 660f db 0fh 01e7b73e: 3a0f cmp cl, [edi] Built on Aegis on Sat Mar 28 15:17:50 2009 using compiler version 1400 Windows 5.1 (Windows XP x86 build 2600) [Service Pack 3] EAX = 020aff28 EBX = 0149afb8 ECX = 00000150 EDX = 01e7b6c0 EBP = 00000000 ESI = 01eb02f0 EDI = 020afee0 ESP = 020afee0 EIP = 01e7b6c9 EFLAGS = 00010206 FPUCW = ffff027f FPUTW = ffffffff Crash reason: Illegal instruction Crash context: An instruction not supported by the CPU was executed in module 'nnedi2'. Pointer dumps: EAX 020aff28: 7c91e920 00000280 00000138 00000005 0006ffd4 0000012c 01eb0590 00000540 EBX 0149afb8: 00000001 00000001 00000001 00000000 00000000 01eb0050 01ed56f0 01efad90 EDX 01e7b6c0: 1e6f0f66 466f0f66 3a0f6610 6607c30f 4e146f0f 4c6f0f66 0f66104e 07ca0f3a ESI 01eb02f0: e3e1e0c6 dad3d6d6 b4ccd5dc d5ccb4bc d6d3dadc e0e1e3d6 b8e0c5c6 bcc5b3a3 EDI 020afee0: 000002a0 01eb059c 01e7f8c4 01eb02f7 00000150 0149b090 020aff28 7c802530 ESP 020afee0: 000002a0 01eb059c 01e7f8c4 01eb02f7 00000150 0149b090 020aff28 7c802530 020aff00: 0149afb8 020aff74 7c80a0db 0149b090 fffffd60 00000150 0149b180 01492140 020aff20: 000001c4 00000005 7c91e920 00000280 00000138 00000005 0006ffd4 0000012c 020aff40: 01eb0590 00000540 000002a0 000002a0 000002a0 0149a160 01e7fa35 01f20564 Thread call stack: 01e7b6c9: nnedi2!0000b6c9 01e7f8c4: nnedi2!0000f8c4 01e7fa35: nnedi2!0000fa35 7c92005d: ntdll!RtlFreeHeap [7c910000+ff2d+130] 01e71752: nnedi2!00001752 7c92005d: ntdll!RtlFreeHeap [7c910000+ff2d+130] 01e717f7: nnedi2!000017f7 7c80b729: kernel32!GetModuleFileNameA [7c800000+b56f+1ba] 7c92005d: ntdll!RtlFreeHeap [7c910000+ff2d+130] -- End of report |
|
|