@InProceedings{Sanchez-HUMANOIDS-2012,
  IS = { zkontrolovano 23 Jan 2014 },
  UPDATE  = { 2014-01-06 },
  author       = {Sanchez-Riera, Jordi and Alameda-Pineda, Xavier and Wienke, Johannes and Deleforge, Antoine and Arias, Soraya and Cech, Jan and Wrede, Sebastian and Horaud, Radu P.},
  title        = {Online Multimodal Speaker Detection for Humanoid Robots},
  booktitle    = {IEEE International Conference on Humanoid Robotics},
  month        = {November 29--Deceber 1},
  year         = {2012},
  publisher = {IEEE Robotics and Automation Society},
  address = {Piscataway, USA},
  pages = {126--133},
  issn = {2164-0572},
  book_pages = {882},
  venue      = {Osaka, Japan},
doi = {10.1109/HUMANOIDS.2012.6651509},
  url          = {http://perception.inrialpes.fr/Publications/2012/SAWDACWH12},
  ANNOTE       = {In this paper we address the problem of audio-visual speaker detection. We introduce an online system working on the humanoid robot NAO. The scene is perceived with two cameras and two microphones. A multimodal Gaussian mixture model (mGMM) fuses the information extracted from the auditory and visual sensors and detects the most probable audio-visual object, e.g., a person emitting a sound, in the 3D space. The system is implemented on top of a platform-independent middleware and it is able to process the information online (17Hz). A detailed description of the system and its implementation are provided, with special emphasis on the online processing issues and the proposed solutions. Experimental validation, performed with five different scenarios, show that that the proposed method opens the door to robust human-robot interaction scenarios. },
  keywords     = {audio-visual speaker detection, sound source localization, stereo matching},
}