Since no one posted such a thing yet, here are some speed comparisons.
CPU is a mobile Core 2 Duo T5470, 1.6 GHz, no hyper-threading.
Due to a lack of AVX2, F16C, and FMA, all the tests use zimg's SSE2 paths.
Input is 700×480 YUV420P8, h264, 1000 frames, decoded with ffms2.
Command used:
Code:
vspipe test.py /dev/null --end 999
with an additional "--requests 1" for the 1 thread tests.
zimg version is d2e712dc54fadf45a2c55169f5a49dd74e86d62e.
fmtconv version is r8.
swscale is from ffmpeg 2.4.3.
Note that swscale never processes more than one frame at a time, because
it doesn't like multithreading (great library design). Only the input
frames are maybe fetched in parallel in the 2 thread tests.
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Code:
Upscaling by 2 using lanczos (700×480 -> 1400×960), 8 bit input:
1 thread:
fmtconv: 31.88 fps
zimg: 32.11 fps
swscale: 28.93 fps
2 threads:
fmtconv: 46.33 fps
zimg: 45.19 fps
swscale: 30.33 fps
Script used:
Code:
import vapoursynth as vs
c = vs.get_core(threads=2) # or threads=1
src = c.ffms2.Source("700x480 YUV420P8 h264.mkv")
def resize_zimg(clip):
src = clip
src = c.z.Depth(src, depth=16)
src = c.z.Resize(src, width=2*src.width, height=2*src.height, filter="lanczos")
src = c.z.Depth(src, depth=8, dither="ordered")
return src
def resize_fmtconv(clip):
src = clip
src = c.fmtc.resample(src, w=2*src.width, h=2*src.height, kernel="lanczos")
src = c.fmtc.bitdepth(src, bits=8, dmode=0)
return src
def resize_swscale(clip):
src = clip
src = c.resize.Lanczos(src, width=2*src.width, height=2*src.height)
return src
src = resize_zimg(src)
#src = resize_swscale(src)
#src = resize_fmtconv(src)
src.set_output()
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Code:
Upscaling by 2 using lanczos (700×480 -> 1400×960), 16 bit input:
1 thread:
fmtconv: 40.66 fps
zimg: 36.54 fps
swscale: 22.89 fps
2 threads:
fmtconv: 55.60 fps
zimg: 50.99 fps
swscale: 24.66 fps
Script used:
Code:
import vapoursynth as vs
c = vs.get_core(threads=2)
src = c.ffms2.Source("700x480 YUV420P8 h264.mkv")
src = c.fmtc.bitdepth(src, bits=16)
def resize_zimg(clip):
src = clip
src = c.z.Resize(src, width=2*src.width, height=2*src.height, filter="lanczos")
return src
def resize_fmtconv(clip):
src = clip
src = c.fmtc.resample(src, w=2*src.width, h=2*src.height, kernel="lanczos")
return src
def resize_swscale(clip):
src = clip
src = c.resize.Lanczos(src, width=2*src.width, height=2*src.height)
return src
src = resize_zimg(src)
#src = resize_swscale(src)
#src = resize_fmtconv(src)
src.set_output()
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Code:
Conversion from YUV420P8 to RGB24:
1 thread:
fmtconv: 60.58 fps
zimg: 54.88 fps
swscale: 59.05 fps
2 threads:
fmtconv: 73.32 fps
zimg: 60.79 fps
swscale: 64.14 fps
Script used:
Code:
import vapoursynth as vs
c = vs.get_core(threads=2)
src = c.ffms2.Source("700x480 YUV420P8 h264.mkv")
def test_zimg(clip):
src = clip
src = c.z.Depth(src, sample=1, depth=32)
src = c.z.Resize(src, width=src.width, height=src.height, filter_uv="lanczos", subsample_w=0, subsample_h=0)
src = c.z.Colorspace(src, 6, 6, 6, 0)
src = c.z.Depth(src, sample=0, depth=8, dither="ordered")
return src
def test_swscale(clip):
src = clip
src = c.resize.Lanczos(src, format=vs.RGB24)
return src
def test_fmtconv(clip):
src = clip
src = c.fmtc.resample(src, kernel="lanczos", css="444")
src = c.fmtc.matrix(src, mat="601", col_fam=vs.RGB)
src = c.fmtc.bitdepth(src, bits=8, dmode=0)
return src
src = test_zimg(src)
#src = test_swscale(src)
#src = test_fmtconv(src)
src.set_output()
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Code:
Conversion from YUV420P10 to RGB24:
1 thread:
fmtconv: 56.96 fps
zimg: 53.05 fps
swscale: 56.43 fps
2 threads:
fmtconv: 70.60 fps
zimg: 59.14 fps
swscale: 60.84 fps
Script used:
Code:
import vapoursynth as vs
c = vs.get_core(threads=2)
src = c.ffms2.Source("700x480 YUV420P8 h264.mkv")
src = c.fmtc.bitdepth(src, bits=10)
def test_zimg(clip):
src = clip
src = c.z.Depth(src, sample=1, depth=32)
src = c.z.Resize(src, width=src.width, height=src.height, filter_uv="lanczos", subsample_w=0, subsample_h=0)
src = c.z.Colorspace(src, 6, 6, 6, 0)
src = c.z.Depth(src, sample=0, depth=8, dither="ordered")
return src
def test_swscale(clip):
src = clip
src = c.resize.Lanczos(src, format=vs.RGB24)
return src
def test_fmtconv(clip):
src = clip
src = c.fmtc.resample(src, kernel="lanczos", css="444")
src = c.fmtc.matrix(src, mat="601", col_fam=vs.RGB)
src = c.fmtc.bitdepth(src, bits=8, dmode=0)
return src
src = test_zimg(src)
#src = test_swscale(src)
#src = test_fmtconv(src)
src.set_output()
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Code:
Bit depth conversion from 16 to 8 bits:
1 thread:
No dithering:
fmtconv: 127.38 fps
zimg: 138.32 fps
Ordered dithering:
fmtconv: 126.02 fps
zimg: 139.20 fps
Floyd-Steinberg error diffusion:
fmtconv: 99.35 fps
zimg: 56.43 fps
2 threads:
No dithering:
fmtconv: 131.94 fps
zimg: 134.10 fps
Ordered dithering:
fmtconv: 123.25 fps
zimg: 128.98 fps
Floyd-Steinberg error diffusion:
fmtconv: 105.70 fps
zimg: 69.97 fps
I have no clue what sort of dithering swscale uses, if any.
The VapourSynth filter doesn't have any parameters for it.
Code:
1 thread:
swscale: 142.85 fps
2 threads:
swscale: 142.04 fps
For these tests I used 2000 frames instead of 1000.
Script used:
Code:
import vapoursynth as vs
c = vs.get_core(threads=2)
src = c.ffms2.Source("700x480 YUV420P8 h264.mkv")
src = c.fmtc.bitdepth(src, bits=16)
def bits_zimg(clip):
src = clip
src = c.z.Depth(src, depth=8, dither="none") # or "ordered", or "error_diffusion"
return src
def bits_fmtconv(clip):
src = clip
src = c.fmtc.bitdepth(src, bits=8, dmode=1) # or 0 for ordered, or 6 for Floyd-Steinberg error diffusion
return src
def bits_swscale(clip):
src = clip
src = c.resize.Lanczos(src, format=vs.YUV420P8)
return src
src = bits_zimg(src)
#src = bits_fmtconv(src)
#src = bits_swscale(src)
src.set_output()
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Code:
Bit depth conversion from 8 to 16 bits:
1 thread:
fmtconv: 159.20 fps
zimg: 145.33 fps
swscale: 150.64 fps
2 threads:
fmtconv: 148.23 fps
zimg: 155.85 fps
swscale: 161.81 fps
Script used:
Code:
import vapoursynth as vs
c = vs.get_core(threads=2)
src = c.ffms2.Source("700x480 YUV420P8 h264.mkv")
def bits_zimg(clip):
src = clip
src = c.z.Depth(src, depth=16)
return src
def bits_fmtconv(clip):
src = clip
src = c.fmtc.bitdepth(src, bits=16)
return src
def bits_swscale(clip):
src = clip
src = c.resize.Lanczos(src, format=vs.YUV420P16)
return src
src = bits_zimg(src)
#src = bits_fmtconv(src)
#src = bits_swscale(src)
src.set_output()