@PhDThesis{Havlena-TR-2012-13,
  IS = { zkontrolovano 26 Nov 2012 },
  UPDATE  = { 2012-10-01 },
  author =	 {Havlena, Michal},
  supervisor =	 {Pajdla, Tom{\'a}{\v s}},
  title =	 {Incremental Structure from Motion for Large Ordered and
                  Unordered Sets of Images},
  school =	 {Center for Machine Perception, K13133 FEE Czech Technical
                  University},
  address =	 {Prague, Czech Republic},
  year =	 {2012},
  month =	 {June},
  day =		 {5},
  type =	 {{PhD Thesis CTU--CMP--2012--13}},
  issn =	 {1213-2365},
  pages =	 {117},
  figures =	 {52},
  authorship =	 {100},
  psurl =	 {[Havlena-TR-2012-13.pdf]},
  project =	 {FP6-IST-027787 DIRAC, GACR 201/07/1136, CTU0705913,
                  FP7-SPA-218814 PRoVisG, FP7-ICT-247525 HUMAVIPS},
  annote =	 {Structure from Motion (SfM) computation from large
    unordered image sets is dominated by image feature matching. This
    thesis proposes avoiding exhaustive pairwise image matching by
    sampling pairs of images and estimating visual overlap using the
    detected occurrences of visual words. Although this technique
    alone leads to a significant speedup of SfM computation, the
    efficiency of the reconstruction from redundant image sets,
    e.g. community image sets of cities with landmarks, can be further
    improved by using the proposed image set reduction technique which
    performs selection of a small subset from the set of input images
    by computing the approximate minimum connected dominating set of a
    graph expressing image similarity. The efficiency of SfM
    computation can be also disrupted by spending too much time in a
    few difficult matching problems instead of exploring other easier
    options first. We propose using a priority queue for interleaving
    different SfM tasks as this facilitates obtaining reasonable
    reconstructions in limited time. The priorities of the individual
    tasks are set according to the estimated visual overlap again but
    they are also influenced by the history of the computation. Image
    similarity estimated from co-occurring visual words proves its
    usability even for ordered image sets in the proposed sequence
    bridging technique. Geometrically verified loop candidates are
    added to the model as new constraints for bundle adjustment which
    closes the detected loops as it enforces global consistency of
    camera poses and 3D structure in the sequence. Several technical
    improvements are proposed also. First, triplets of images are used
    as the seeds of the reconstruction because 3D points verified in
    three views are more likely to be correct. Secondly, we
    demonstrate that the amount of translation w.r.t. the scene can be
    reliably measured for general as well as planar scenes by the
    dominant apical angle (DAA). By selecting only image pairs which
    have sufficient DAA, one is able to select keyframes from image
    sequences and high quality seeds when reconstructing from
    unordered image data. Finally, cone test is used instead of the
    widely used reprojection error to verify 2D-3D matches which
    allows for accepting a correct match even if the currently
    estimated 3D point location is incorrect. The proposed methods are
    validated by several experiments using both ordered and unordered
    image sets comprising thousands of images. City modeling is
    performed from both fish-eye lens images and equirectangular
    panoramas, successful pedestrian detection is demonstrated on
    images generated using the proposed non-central cylindrical
    projection once the images are stabilized w.r.t. ground plane
    using the estimated camera poses.},
  keywords =	 {Structure from Motion, Omnidirectional Vision, 
                  City Modeling},
}