@TechReport{Kostliva-TR-2007-08, IS = { zkontrolovano 13 Dec 2007 }, UPDATE = { 2007-07-24 }, author = {Kostliv{\'a}, Jana and {\v C}ech, Jan and {\v S}{\'a}ra, Radim}, title = {{ROC} Based Evaluation of Stereo Algorithms}, institution = {Center for Machine Perception, K13133 FEE Czech Technical University}, address = {Prague, Czech Republic}, year = {2007}, month = {March}, type = {Research Report}, number = {CTU--CMP--2007--08}, issn = {1213-2365}, pages = {25}, figures = {8}, authorship = {34-33-33}, psurl = {[Kostliva-TR-2007-08.pdf]}, project = {1ET101210406, FP6-IST-027113 eTRIMS, MRTN-CT-2004-005439 VISIONTRAIN, Dur IG2003-2 062}, annote = {Which stereo algorithm is better? The one which is very dense but often erroneous or rather one which is very accurate but sparse? It depends on the application. In general, we can only say that the algorithm is better than the other if it is more accurate and denser. In literature, there exist several methods to evaluate quality of dense stereo matching algorithms. Their bottleneck is in tested algorithm parameter setting, which is assumed to be fixed. Such evaluation results are typically very different for various parameter setting in the sense they somehow change the tradeoff between accuracy and density. Therefore, we developed a new method for testing stereo algorithm based on the ROC-like analysis. We introduce ROC curves for stereo algorithms and define new numerical characteristics, which evaluate the algorithm itself, not a pair (algorithm, parameter setting) as it is in literature. Comparing ROC-curves of all tested algorithms we obtain the Feasibility Boundary, which is the ROC curve of all algorithms together, i.e. the best possible results which are feasible by a set of tested stereo algorithms. The important are the algorithms which forms the feasibility boundary, since they produce the best feasible results. On the other hand the algorithms which do not appear in the feasibility boundary are worse than the others both in the accuracy and density and are redundant in fact. We performed an experiment evaluating several known algorithms (representatives of different approaches) on several complex scenes with ground-truth disparity maps. Surprisingly, from this set, the most of the algorithms appear on the feasibility boundary, i.e. they are the best of all for certain requirement for density or accuracy. Based on this study, the algorithms with a strong prior models are suitable when higher density is desired which causes higher errors. Algorithms with a weak prior model but unambiguous, are suitable for application where there is a requirement for low error which inevitably causes sparser matching results. We are preparing a web-site for an automatic evaluation, so that other researchers can easily use this method. Such a collection of evaluation results is also useful for a potential user, who can simply select the most suitable algorithm and read the parameter settings. }, keywords = {computer vision, dense stereo, performance evaluation, ROC analysis}, }