@Article{Franc-OCA-JMLR2009,
  IS = { zkontrolovano 06 Nov 2009 },
  UPDATE  = { 2009-11-06 },
  author =     {Vojt{\v e}ch, Franc and Soeren, Sonneburg},
  title =      {Optimized Cutting Plane Algorithm for Large-Scale Risk Minimization},
  year =       {2009},
  month =      {October},
  pages =      {2157--2232},
  journal =    {Journal of Machine Learning Research},
  publisher =  {Microtome Publishing},
  address =    {31 Gibbs St, Brookline, United States},
  issn =       {1532-4435},
  volume =     {10},
  authorship = {50-50},
  annote = {We have developed an optimized cutting plane algorithm
    (OCA) for solving large-scale risk minimization problems. We prove
    that the number of iterations OCA requires to converge to a
    epsilon precise solution is approximately linear in the sample
    size. We also derive OCAS, an OCA-based linear binary Support
    Vector Machine (SVM) solver, and OCAM, a linear multi-class SVM
    solver.  In an extensive empirical evaluation we show that OCAS
    outperforms current state-of-the-art SVM solvers like svmlight,
    svmperf and BMRM, achieving speedup factor more than 1,200 over
    svmlight on some data sets and speedup factor of 29 over svmperf,
    while obtaining the same precise support vector solution.  OCAS,
    even in the early optimization steps, often shows faster
    convergence than the currently prevailing approximative methods in
    this domain, SGD and Pegasos.  In addition, our proposed linear
    multi-class SVM solver, OCAM, achieves speedups of factor of up to
    10 compared to SVMmulticlass. Finally, we use OCAS and OCAM in two
    real-world applications, the problem of human acceptor splice site
    detection and malware detection.  Effectively parallelizing OCAS,
    we achieve state-of-the-art results on an acceptor splice site
    recognition problem only by being able to learn from all the
    available 50 million examples in a 12-million-dimensional feature
    space. Source code, data sets and scripts to reproduce the
    experiments are available at
    http://cmp.felk.cvut.cz/~xfrancv/ocas/html/ },
  keywords =   {risk minimization, linear support vector machines,
                multi-class classification, binary classification, 
                large-scale learning, parallelization},
  project =    {1M0567},
  note = {on-line},
  psurl =      { [PDF, 1.5 MB]},
}