IS = { zkontrolovano 02 Jan 2015 },
  UPDATE  = { 2014-12-19 },
    title = {Audio-Video Speaker Diarization for Unsupervised Speaker
                  and Face Model Creation},
    author = {Campr, Pavel and Kune{\v s}ov{\'a}, Marie and Van{\v e}k, Jan and
                  {\v C}ech, Jan and Psutka, Josef},
   affiliation = {13133-NULL-NULL-13133-NULL},
   authorship = {20-20-20-20-20},
    booktitle = {Text, Speech and Dialogue},
    ISBN = {978-3-319-10815-5},
    venue = {Brno, Czech Republic},
    publisher = {Springer},
    address = {Cham, Switzerland},
    year = {2014},
    month = {September},
    day = {8--12},
    pages = {465-472},
   series =      {Lecture Notes in Computer Science},
  number =      {8655},
   book_pages = {623},
    Annote = {Our goal is to create speaker models in audio domain and
                  face models in video do main from a set of videos in
                  an unsupervised manner. Such models can be used
                  later for speaker identification in audio domain
                  (answering the question "Who was speaking and when")
                  and/or fo r face recognition ("Who was seen and
                  when") for given videos that contain speaking
                  persons. T he proposed system is based on an
                  audio-video diarization system that tries to resolve
                  the dis advantages of the individual
                  modalities. Experiments on broadcasts of Czech
                  parliament meeting s show that the proposed
                  combination of individual audio and video
                  diarization systems yields an improvement of the
                  diarization error rate (DER).},
    keywords = {audio-video speaker diarization, audio speaker
                  recognition, face recognition},
    project = {GACR P103/12/G084},
    url = {http://dx.doi.org/10.1007/978-3-319-10816-2_56},