View Single Post
Old 27th November 2014, 12:06   #7  |  Link
jackoneill
unsigned int
 
jackoneill's Avatar
 
Join Date: Oct 2012
Location: 🇪🇺
Posts: 760
Since no one posted such a thing yet, here are some speed comparisons.

CPU is a mobile Core 2 Duo T5470, 1.6 GHz, no hyper-threading.
Due to a lack of AVX2, F16C, and FMA, all the tests use zimg's SSE2 paths.

Input is 700×480 YUV420P8, h264, 1000 frames, decoded with ffms2.

Command used:
Code:
vspipe test.py /dev/null --end 999
with an additional "--requests 1" for the 1 thread tests.

zimg version is d2e712dc54fadf45a2c55169f5a49dd74e86d62e.
fmtconv version is r8.
swscale is from ffmpeg 2.4.3.

Note that swscale never processes more than one frame at a time, because
it doesn't like multithreading (great library design). Only the input
frames are maybe fetched in parallel in the 2 thread tests.

~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

Code:
Upscaling by 2 using lanczos (700×480 -> 1400×960), 8 bit input:
    1 thread:
        fmtconv:    31.88 fps
        zimg:       32.11 fps
        swscale:    28.93 fps

    2 threads:
        fmtconv:    46.33 fps
        zimg:       45.19 fps
        swscale:    30.33 fps
Script used:

Code:
import vapoursynth as vs

c = vs.get_core(threads=2) # or threads=1 

src = c.ffms2.Source("700x480 YUV420P8 h264.mkv")


def resize_zimg(clip):
    src = clip
    src = c.z.Depth(src, depth=16)
    src = c.z.Resize(src, width=2*src.width, height=2*src.height, filter="lanczos")
    src = c.z.Depth(src, depth=8, dither="ordered")
    return src

def resize_fmtconv(clip):
    src = clip
    src = c.fmtc.resample(src, w=2*src.width, h=2*src.height, kernel="lanczos")
    src = c.fmtc.bitdepth(src, bits=8, dmode=0)
    return src

def resize_swscale(clip):
    src = clip
    src = c.resize.Lanczos(src, width=2*src.width, height=2*src.height)
    return src


src = resize_zimg(src)
#src = resize_swscale(src)
#src = resize_fmtconv(src)

src.set_output()
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~


Code:
Upscaling by 2 using lanczos (700×480 -> 1400×960), 16 bit input:
    1 thread:
        fmtconv:    40.66 fps
        zimg:       36.54 fps
        swscale:    22.89 fps

    2 threads:
        fmtconv:    55.60 fps
        zimg:       50.99 fps
        swscale:    24.66 fps
Script used:

Code:
import vapoursynth as vs

c = vs.get_core(threads=2)

src = c.ffms2.Source("700x480 YUV420P8 h264.mkv")
src = c.fmtc.bitdepth(src, bits=16)


def resize_zimg(clip):
    src = clip
    src = c.z.Resize(src, width=2*src.width, height=2*src.height, filter="lanczos")
    return src

def resize_fmtconv(clip):
    src = clip
    src = c.fmtc.resample(src, w=2*src.width, h=2*src.height, kernel="lanczos")
    return src

def resize_swscale(clip):
    src = clip
    src = c.resize.Lanczos(src, width=2*src.width, height=2*src.height)
    return src


src = resize_zimg(src)
#src = resize_swscale(src)
#src = resize_fmtconv(src)

src.set_output()
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~


Code:
Conversion from YUV420P8 to RGB24:
    1 thread:
        fmtconv:    60.58 fps
        zimg:       54.88 fps
        swscale:    59.05 fps

    2 threads:
        fmtconv:    73.32 fps
        zimg:       60.79 fps
        swscale:    64.14 fps
Script used:

Code:
import vapoursynth as vs

c = vs.get_core(threads=2)

src = c.ffms2.Source("700x480 YUV420P8 h264.mkv")


def test_zimg(clip):
    src = clip
    src = c.z.Depth(src, sample=1, depth=32)
    src = c.z.Resize(src, width=src.width, height=src.height, filter_uv="lanczos", subsample_w=0, subsample_h=0)
    src = c.z.Colorspace(src, 6, 6, 6, 0)
    src = c.z.Depth(src, sample=0, depth=8, dither="ordered")
    return src

def test_swscale(clip):
    src = clip
    src = c.resize.Lanczos(src, format=vs.RGB24)
    return src

def test_fmtconv(clip):
    src = clip
    src = c.fmtc.resample(src, kernel="lanczos", css="444")
    src = c.fmtc.matrix(src, mat="601", col_fam=vs.RGB)
    src = c.fmtc.bitdepth(src, bits=8, dmode=0)
    return src


src = test_zimg(src)
#src = test_swscale(src)
#src = test_fmtconv(src)

src.set_output()
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~


Code:
Conversion from YUV420P10 to RGB24:
    1 thread:
        fmtconv:    56.96 fps
        zimg:       53.05 fps
        swscale:    56.43 fps

    2 threads:
        fmtconv:    70.60 fps
        zimg:       59.14 fps
        swscale:    60.84 fps
Script used:

Code:
import vapoursynth as vs

c = vs.get_core(threads=2)

src = c.ffms2.Source("700x480 YUV420P8 h264.mkv")
src = c.fmtc.bitdepth(src, bits=10)


def test_zimg(clip):
    src = clip
    src = c.z.Depth(src, sample=1, depth=32)
    src = c.z.Resize(src, width=src.width, height=src.height, filter_uv="lanczos", subsample_w=0, subsample_h=0)
    src = c.z.Colorspace(src, 6, 6, 6, 0)
    src = c.z.Depth(src, sample=0, depth=8, dither="ordered")
    return src

def test_swscale(clip):
    src = clip
    src = c.resize.Lanczos(src, format=vs.RGB24)
    return src

def test_fmtconv(clip):
    src = clip
    src = c.fmtc.resample(src, kernel="lanczos", css="444")
    src = c.fmtc.matrix(src, mat="601", col_fam=vs.RGB)
    src = c.fmtc.bitdepth(src, bits=8, dmode=0)
    return src


src = test_zimg(src)
#src = test_swscale(src)
#src = test_fmtconv(src)

src.set_output()
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~


Code:
Bit depth conversion from 16 to 8 bits:
    1 thread:
        No dithering:
            fmtconv:    127.38 fps
            zimg:       138.32 fps

        Ordered dithering:
            fmtconv:    126.02 fps
            zimg:       139.20 fps

        Floyd-Steinberg error diffusion:
            fmtconv:    99.35 fps
            zimg:       56.43 fps

    2 threads:
        No dithering:
            fmtconv:    131.94 fps
            zimg:       134.10 fps

        Ordered dithering:
            fmtconv:    123.25 fps
            zimg:       128.98 fps

        Floyd-Steinberg error diffusion:
            fmtconv:    105.70 fps
            zimg:        69.97 fps
I have no clue what sort of dithering swscale uses, if any.
The VapourSynth filter doesn't have any parameters for it.

Code:
    1 thread:
        swscale:    142.85 fps

    2 threads:
        swscale:    142.04 fps
For these tests I used 2000 frames instead of 1000.

Script used:

Code:
import vapoursynth as vs

c = vs.get_core(threads=2)

src = c.ffms2.Source("700x480 YUV420P8 h264.mkv")
src = c.fmtc.bitdepth(src, bits=16)


def bits_zimg(clip):
    src = clip
    src = c.z.Depth(src, depth=8, dither="none") # or "ordered", or "error_diffusion"
    return src

def bits_fmtconv(clip):
    src = clip
    src = c.fmtc.bitdepth(src, bits=8, dmode=1) # or 0 for ordered, or 6 for Floyd-Steinberg error diffusion
    return src

def bits_swscale(clip):
    src = clip
    src = c.resize.Lanczos(src, format=vs.YUV420P8)
    return src


src = bits_zimg(src)
#src = bits_fmtconv(src)
#src = bits_swscale(src)

src.set_output()
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~


Code:
Bit depth conversion from 8 to 16 bits:
    1 thread:
        fmtconv:    159.20 fps
        zimg:       145.33 fps
        swscale:    150.64 fps

    2 threads:
        fmtconv:    148.23 fps
        zimg:       155.85 fps
        swscale:    161.81 fps
Script used:

Code:
import vapoursynth as vs

c = vs.get_core(threads=2)

src = c.ffms2.Source("700x480 YUV420P8 h264.mkv")


def bits_zimg(clip):
    src = clip
    src = c.z.Depth(src, depth=16)
    return src

def bits_fmtconv(clip):
    src = clip
    src = c.fmtc.bitdepth(src, bits=16)
    return src

def bits_swscale(clip):
    src = clip
    src = c.resize.Lanczos(src, format=vs.YUV420P16)
    return src


src = bits_zimg(src)
#src = bits_fmtconv(src)
#src = bits_swscale(src)

src.set_output()
__________________
Buy me a "coffee" and/or hire me to write code!
jackoneill is offline   Reply With Quote