@InProceedings{Bresler-Prusa-Hlavac-CVWW-2015,
  IS = { zkontrolovano 26 Jun 2015 },
  UPDATE  = { 2015-03-10 },
  author =      {Bresler, Martin and Pr{\accent23 u}{\v s}a, Daniel and
                  Hlav{\'a}{\v c}, V{\' a}clav},
  title =       {Using Agglomerative Clustering of Strokes to Perform
                  Symbols Over-segmentation within a Diagram
                  Recognition System},
  c_title =     {Pou{\v z}it{\' \i} aglomerativn{\' \i}ho shlukov{\'
                  a}n{\' \i} tah{\r u} pro dosa{\v z}en{\' \i} p{\v
                  r}esegmentov{\' a}n{\' \i} v r{\' a}mci syst{\' e}mu
                  pro rozpozn{\' a}v{\' a}n{\' \i} diagram{\r u}},
  year =        {2015},
  pages =       {67-74},
  booktitle =   {CVWW 2015: Proceedings of the 20th Computer Vision Winter Workshop},
  publisher =   {Graz University of Technology},
  address =     {Inffeldgasse 16/II,  Graz, Austria},
  editor =      {Paul Wohlhart, Vincent Lepetit},
  book_pages =  {135},
  isbn =        {978-3-85125-388-7},
  month =       {February},
  day =         {9-11},
  venue =       {Seggau, Austria},
  annote =      {Symbol segmentation is a critical part of handwriting
                  recognition. Any mistake done in this step is
                  propagating further through the recognition
                  pipeline. It forces researchers to consider methods
                  generating multiple hypotheses for symbol
                  segmentation-over-segmentation. Simple approaches
                  which takes all reasonable combinations of strokes
                  are applied very often, because they allow to
                  achieve high recall rates very easily. However, they
                  generate too much hypotheses. It makes a recognizer
                  considerably slow. This paper presents our
                  experimentation with an alternative method based on
                  a single linkage agglomerative clustering of strokes
                  with trainable distance metric. We embed the method
                  into the state-of-the-art recognizer for on-line
                  sketched diagrams.  We show that it results in a
                  decrease in the number of generated hypotheses while
                  still reaching high recall rates. A problem emerges,
                  since the number of bad hypotheses is still
                  significantly higher than the number of symbols and
                  it leads to unbalanced training datasets. To deal
                  with it, we propose to train symbol classifiers with
                  synthesized artificial samples. We show that the
                  combination of these two improvements make the
                  recognizer significantly faster and very precise.},
  keywords =    {Clustering, Diagram recognition, Flowcharts, Finite automata, Artificial samples},
  prestige =    {international},
  project =     {SGS13/205/OHK3/3T/13, GACR P103/10/0783},
  psurl = { [Bresler-Prusa-Hlavac-CVWW-2015.pdf] },
}