@TechReport{Chum-TR-2008-15,
  IS = { zkontrolovano 18 Feb 2010 },
  UPDATE  = { 2010-02-18 },
author =      {Chum, Ond{\v r}ej and Ji{\v r}{\' \i}, Matas},
title =       {Web Scale Image Clustering},
institution = {Center for Machine Perception, 
               K333 FEE Czech Technical University},
address =     {Prague, Czech Republic},
year =        {2008},
month =       {May},
type =        {Research Report},
number =      {{CTU--CMP--2008--15}},
issn =        {1213-2365},
annote = { We propose a randomized data mining method that finds
  clusters of spatially overlapping images. The core of the method
  relies on the min-Hash algorithm for fast detection of so-called
  cluster seeds. The seeds are then used as visual queries to obtain
  clusters which are formed as transitive closures of sets of
  partially overlapping images that include the seed. We show that the
  probability of finding a seed for an image cluster rapidly increases
  with the size of the cluster. The properties and performance of the
  algorithm are demonstrated on datasets with 104 and 105 images. The
  speed of the method depends on the size of the database and is close
  to linear for databases sizes up to approximately 234 1010
  images. The proposed algorithm provides, as a side effect, a
  state-of-the-art near duplicate image detection.},
keywords =    {Image clusteringm, min-hash algorithm},
pages =       {14},
figures =     {11},
authorship =  {50-50},
psurl       = {[Chum-TR-2001-27.pdf]},
project =     {ICT-215078 DIPLECS},
}