%% This BibTeX bibliography file was created using BibDesk.
%% http://bibdesk.sourceforge.net/


%% Created for Srikrishna Sridhar at 2012-08-31 14:28:24 -0500


%% Saved with string encoding Unicode (UTF-8)


@string{kdd = {ACM SIGKDD Conference on Knowledge Discovery and Data Mining}}

@string{proc = {Proceedings of the}}

@string{kddshort = {KDD}}

@string{kdd00 = proc # { 6th } # kdd # { (} # kddshort # {'00)}}

@string{scg = {Annual ACM Symposium on Computational Geometry}}

@string{scgshort = {SCG}}

@string{scg09 = proc # { 25th } # scg # { (} # scgshort # {'09)}}

@string{soda = {Annual ACM-SIAM Symposium on Discrete Algorithms}}

@string{sodashort = {SODA}}

@string{soda07 = proc # { 18th } # soda # { (} # sodashort # {'07)}}


@article{Lin1989,
	Abstract = {We derive the asymptotic distribution of the maximum partial likelihood estimator $\hat\beta$ for the vector of regression coefficients β under a possibly misspecified Cox proportional hazards model. As in the parametric setting, this estimator $\hat\beta$ converges to a well-defined constant vector β*. In addition, the random vector $n^{1/2}(\hat\beta - \beta^\ast)$ is asymptotically normal with mean 0 and with a covariance matrix that can be consistently estimated. The newly proposed robust covariance matrix estimator is similar to the so-called "sandwich" variance estimators that have been extensively studied for parametric cases. For many misspecified Cox models, the asymptotic limit β* or part of it can be interpreted meaningfully. In those circumstances, valid statistical inferences about the corresponding covariate effects can be drawn based on the aforementioned asymptotic theory of $\hat\beta$ and the related results for the score statistics. Extensive studies demonstrate that the proposed robust tests and interval estimation procedures are appropriate for practical use. In particular, the robust score tests perform quite well even for small samples. In contrast, the conventional model-based inference procedures often lead to tests with supranominal size and confidence intervals with rather poor coverage probability.},
	Author = {Lin, D. Y. and Wei, L. J.},
	Copyright = {Copyright {\copyright} 1989 American Statistical Association},
	Date-Added = {2012-08-31 18:38:45 +0000},
	Date-Modified = {2012-08-31 18:38:52 +0000},
	Issn = {01621459},
	Journal = {Journal of the American Statistical Association},
	Jstor_Articletype = {research-article},
	Jstor_Formatteddate = {Dec., 1989},
	Language = {English},
	Number = {408},
	Pages = {pp. 1074-1078},
	Publisher = {American Statistical Association},
	Title = {The Robust Inference for the Cox Proportional Hazards Model},
	Url = {http://www.jstor.org/stable/2290085},
	Volume = {84},
	Year = {1989},
	Bdsk-Url-1 = {http://www.jstor.org/stable/2290085}}

@manual{postgres:9.1.3,
	Author = {{The PostgreSQL Global Development Group}},
	Date-Added = {2012-07-26 21:00:33 +0000},
	Date-Modified = {2012-07-26 21:02:06 +0000},
	Title = {{PostgreSQL} 9.1.3 Documentation},
	Url = {http://www.postgresql.org/docs/9.1},
	Year = {2011},
	Bdsk-Url-1 = {http://www.postgresql.org/docs/9.1}}

@manual{rcox,
	Author = {John Fox},
	Title = {{Cox Proportional Hazards Regression for Survival Data:  Appendix to An R and S-PLUS Companion to Applied Regression}},
	Year = {2002}}

@manual{statacox,
	Author = {Stata Press},
	Title = {{stcox - Cox proportional hazards model}}
}

@article{stratifiedethzslides,
	Author = {Lisa Borsi and Marc Lickes and Lovro Soldo},
	Title = {The stratified Cox Procedure},
	Volume = {27},
	Year = {2011}
}

@manual{coxzph,
  Title = {Test the Proportional Hazards Assumption of a Cox Regression (R manual for Cox Zph)}
}

@article{V84a,
	Abstract = {{Several new methods are presented for selecting n records at
	random without replacement from  a file containing N records. Each algorithm
	selects the records for the sample in a sequential manner---in the same
	order the records appear in the file. The algorithms are online in that the
	records for the sample are selected iteratively with no preprocessing. The
	algorithms require a constant amount of space and are short and easy to
	implement. The main result of this paper is the design and analysis of
	Algorithm D, which does the sampling in O(n) time, on the average; roughly n
	uniform random variates are generated, and approximately n exponentiation
	operations (of the form ab, for real numbers a and b) are performed during
	the sampling. This solves an open problem in the literature. CPU timings on
	a large mainframe computer indicate that Algorithm D is significantly faster
	than the sampling algorithms in use today.}},
	Author = {Vitter, Jeffrey Scott},
	Date-Added = {2012-03-10 23:03:35 -0800},
	Date-Modified = {2012-03-10 23:03:35 -0800},
	Doi = {10.1145/358105.893},
	Journal = {Communications of the ACM},
	Keywords = {Sampling},
	Number = {7},
	Pages = {703--718},
	Publisher = {ACM},
	Title = {Faster methods for random sampling},
	Volume = {27},
	Year = {1984}
}

@article{V85a,
	Abstract = {{We introduce fast algorithms for selecting a random sample of n records without replacement from a pool of N records, where the value of N is unknown beforehand. The main result of the paper is the design and analysis of Algorithm Z; it does the sampling in one pass using constant space and in O(n(1 + log(N/n))) expected time, which is optimum, up to a constant factor. Several optimizations are studied that collectively improve the speed of the naive version of the algorithm by an order of magnitude. We give an efficient Pascal-like implementation that incorporates these modifications and that is suitable for general use. Theoretical and empirical results indicate that Algorithm Z outperforms current methods by a significant margin.}},
	Author = {Vitter, Jeffrey S.},
	Date-Added = {2012-03-09 17:05:52 -0800},
	Date-Modified = {2012-03-09 17:05:52 -0800},
	Doi = {10.1145/3147.3165},
	Journal = {ACM Transactions on Mathematical Software},
	Keywords = {Sampling},
	Number = {1},
	Pages = {37--57},
	Publisher = {ACM},
	Title = {Random sampling with a reservoir},
	Volume = {11},
	Year = {1985}
}

@article{ORO05a,
	Author = {Ogita, Takeshi and Rump, Siegfried M. and Oishi, Shin'ichi},
	Date-Added = {2012-03-09 11:02:03 -0800},
	Date-Modified = {2012-03-09 17:06:04 -0800},
	Doi = {10.1137/030601818},
	Journal = {SIAM Journal on Scientific Computing},
	Keywords = {Numerical Analysis},
	Month = jun,
	Number = {6},
	Pages = {1955--1988},
	Publisher = {Society for Industrial and Applied Mathematics},
	Title = {Accurate Sum and Dot Product},
	Volume = {26},
	Year = {2005},
	Bdsk-Url-1 = {http://dx.doi.org/10.1137/030601818}}

@article{MB83a,
	Abstract = {{A convenient one-pass algorithm for drawing a simple random sample without replace- ment of size n from a population of N members, when N is initially unknown, is presented. Moreover, even when N is known, this algorithm appears to be more efficient than previously suggested algorithms when the entire population is stored in the fast memory of the computer. Applications to sampling from a computer file and to linear programming are briefly indicated.}},
	Author = {McLeod, A. I. and Bellhouse, D. R.},
	Date-Added = {2012-03-08 17:17:03 -0800},
	Date-Modified = {2012-03-10 23:34:30 -0800},
	Eprint = {2347297},
	Eprinttype = {jstor},
	Journal = {Journal of the Royal Statistical Society. Series C (Applied Statistics)},
	Keywords = {Sampling},
	Number = {2},
	Pages = {182--184},
	Publisher = {Blackwell Publishing for the Royal Statistical Society},
	Title = {A Convenient Algorithm for Drawing a Simple Random Sample},
	Volume = {32},
	Year = {1983}
}

@article{C82a,
	Abstract = {{We present a general purpose unequal probability without replacement sampling plan with fixed sample size. In contrast to existing such plans, our scheme keeps the sample size fixed and lets the population units enter the sample one at a time through a carefully designed random mechanism. Consequently, all high-order inclusion probabilities can be easily computed.}},
	Author = {Chao, Min-Te},
	Date-Added = {2012-03-08 17:13:26 -0800},
	Date-Modified = {2012-03-08 17:17:59 -0800},
	Doi = {10.1093/biomet/69.3.653},
	Journal = {Biometrika},
	Keywords = {Sampling},
	Number = {3},
	Pages = {653--656},
	Title = {A general purpose unequal probability sampling plan},
	Volume = {69},
	Year = {1982},
	Bdsk-Url-1 = {http://dx.doi.org/10.1093/biomet/69.3.653}}

@book{CS08a,
	Author = {Christopher D. Manning and Prabhakar Raghavan and Hinrich Sch{\"u}tze},
	Date-Added = {2012-03-07 10:47:43 -0800},
	Date-Modified = {2012-03-07 10:48:21 -0800},
	Publisher = {Cambridge University Press},
	Title = {Introduction to Information Retrieval},
	Url = {http://nlp.stanford.edu/IR-book/},
	Year = {2008},
	Bdsk-Url-1 = {http://nlp.stanford.edu/IR-book/}}

@inproceedings{V09a,
	Abstract = {{The k-means algorithm is a well-known method for partitioning n points that lie in the d-dimensional space into k clusters. Its main features are simplicity and speed in practice. Theoretically, however, the best known upper bound on its running time (i.e. O(nkd)) is, in general, exponential in the number of points (when kd=\Omega(n log n)). Recently, Arthur and Vassilvitskii [2] showed a super-polynomial worst-case analysis, improving the best known lower bound from \Omega(n) to 2\Omega(\sqrt{n}) with a construction in d=\Omega(\sqrt{n}) dimensions. In [2] they also conjectured the existence of super-polynomial lower bounds for any d>=2. Our contribution is twofold: we prove this conjecture and we improve the lower bound, by presenting a simple construction in the plane that leads to the exponential lower bound 2^\Omega(n).}},
	Author = {Vattani, Andrea},
	Crossref = {:SCG09},
	Date-Added = {2012-03-06 15:54:24 -0800},
	Date-Modified = {2012-03-08 17:17:37 -0800},
	Doi = {10.1145/1542362.1542419},
	Keywords = {Clustering},
	Pages = {324--332},
	Title = {$k$-means requires exponentially many iterations even in the plane},
	Bdsk-Url-1 = {http://dx.doi.org/10.1145/1542362.1542419}}

@online{AMR09a,
	Abstract = {{The k-means method is one of the most widely used clustering algorithms, drawing its popularity from its speed in practice. Recently, however, it was shown to have exponential worst-case running time. In order to close the gap between practical performance and theoretical analysis, the k-means method has been studied in the model of smoothed analysis. But even the smoothed analyses so far are unsatisfactory as the bounds are still super-polynomial in the number n of data points.
In this paper, we settle the smoothed running time of the k-means method. We show that the smoothed number of iterations is bounded by a polynomial in n and 1/\sigma, where \sigma is the standard deviation of the Gaussian perturbations. This means that if an arbitrary input data set is randomly perturbed, then the k-means method will run in expected polynomial time on that input set.}},
	Author = {David Arthur and Bodo Manthey and Heiko R{\"o}glin},
	Date-Added = {2012-03-06 15:34:08 -0800},
	Date-Modified = {2012-03-10 23:48:17 -0800},
	Eprint = {0904.1113},
	Eprintclass = {cs.DS},
	Eprinttype = {arxiv},
	Title = {k-Means has Polynomial Smoothed Complexity},
	Year = {2009}}

@article{L82a,
	Abstract = {{It has long been realized that in pulse-code modulation (PCM), with a given ensemble of signals to handle, the quantum values should be spaced more closely in the voltage regions where the signal amplitude is more likely to fall. It has been shown by Panter and Dite that, in the limit as the number of quanta becomes infinite, the asymptotic fractional density of quanta per unit voltage should vary as the one-third power of the probability density per unit voltage of signal amplitudes. In this paper the corresponding result for any finite number of quanta is derived; that is, necessary conditions are found that the quanta and associated quantization intervals of an optimum finite quantization scheme must satisfy. The optimization criterion used is that the average quantization noise power be a minimum. It is shown that the result obtained here goes over into the Panter and Dite result as the number of quanta become large. The optimum quautization schemes for2^{b}quanta,b=1,2, cdots, 7, are given numerically for Gaussian and for Laplacian distribution of signal amplitudes.}},
	Author = {Stuart Lloyd},
	Date-Added = {2012-03-06 15:00:11 -0800},
	Date-Modified = {2012-03-06 15:04:10 -0800},
	Doi = {10.1109/TIT.1982.1056489},
	Journal = {IEEE Transactions on Information Theory},
	Month = mar,
	Note = {Technical Report appeared much earlier in: \emph{Bell Telephone Laboratories Paper} (1957)},
	Number = {2},
	Pages = {129--137},
	Title = {Least squares quantization in PCM},
	Volume = {28},
	Year = {1982},
	Bdsk-Url-1 = {http://dx.doi.org/10.1109/TIT.1982.1056489}}

@article{MNV10a,
	Abstract = {{In the k -means problem, we are given a finite set S of points in R^m , and integer k >= 1, and we want to find k points (centers) so as to minimize the sum of the square of the Euclidean distance of each point in S to its nearest center. We show that this well-known problem is NP-hard even for instances in the plane, answering an open question posed by Dasgupta (2007) [7].}},
	Author = {Meena Mahajan and Prajakta Nimbhorkar and Kasturi Varadarajan},
	Date-Added = {2012-03-06 14:52:27 -0800},
	Date-Modified = {2012-03-08 17:17:44 -0800},
	Doi = {10.1016/j.tcs.2010.05.034},
	Journal = {Theoretical Computer Science},
	Keywords = {Clustering},
	Month = jun,
	Note = {In Press.},
	Title = {The planar $k$-means problem is {NP}-hard},
	Year = {2010},
	Bdsk-Url-1 = {http://dx.doi.org/10.1016/j.tcs.2010.05.034}}

@article{ADH09a,
	Abstract = {{A recent proof of NP-hardness of Euclidean sum-of-squares clustering, due to Drineas et al. (Mach. Learn. 56:9--33, 2004), is not valid. An alternate short proof is provided.}},
	Author = {Aloise, Daniel and Deshpande, Amit and Hansen, Pierre and Popat, Preyas},
	Date-Added = {2012-03-06 14:43:42 -0800},
	Date-Modified = {2012-03-06 14:45:02 -0800},
	Doi = {10.1007/s10994-009-5103-0},
	Journal = {Machine Learning},
	Pages = {245-248},
	Title = {{NP}-hardness of {Euclidean} sum-of-squares clustering},
	Volume = {75},
	Year = {2009},
	Bdsk-Url-1 = {http://dx.doi.org/10.1007/s10994-009-5103-0}}

@book{F68a,
	Author = {William Feller},
	Date-Added = {2012-01-31 11:53:21 -0800},
	Date-Modified = {2012-01-31 11:53:42 -0800},
	Edition = {3rd},
	Publisher = {Wiley},
	Title = {An Introduction to Probability Theory and Its Applications},
	Year = {1968}}

@inproceedings{MNU00a,
	Author = {Andrew McCallum and Kamal Nigam and Lyle H. Ungar},
	Crossref = {:KDD00},
	Date-Added = {2012-01-30 22:34:21 -0800},
	Date-Modified = {2012-03-10 23:31:44 -0800},
	Doi = {10.1145/347090.347123},
	Keywords = {Clustering},
	Pages = {169--178},
	Title = {Efficient clustering of high-dimensional data sets with application to reference matching},
	Year = {2000}
}

@proceedings{:KDD00,
	Booktitle = kdd00,
	Date-Added = {2012-01-30 22:33:08 -0800},
	Date-Modified = {2012-01-30 22:33:34 -0800},
	Title = kdd00,
	Year = {2000}}

@inproceedings{AV07a,
	Author = {David Arthur and Sergei Vassilvitskii},
	Crossref = {:SODA07},
	Date-Added = {2012-01-30 22:05:40 -0800},
	Date-Modified = {2012-03-11 03:00:57 -0700},
	Eprint = {1283494},
	Eprinttype = {acm},
	Pages = {1027--1035},
	Title = {k-means++: the advantages of careful seeding},
	Year = {2007}
}

@proceedings{:SODA07,
	Booktitle = soda07,
	Date-Added = {2012-01-30 22:11:03 -0800},
	Date-Modified = {2012-01-30 22:11:27 -0800},
	Title = soda07,
	Year = {2007}}

@proceedings{:SCG09,
	Booktitle = scg09,
	Date-Added = {2012-03-06 15:59:03 -0800},
	Date-Modified = {2012-03-06 15:59:48 -0800},
	Title = scg09,
	Year = {2009}}

@inproceedings{DBLP:conf/icml/SrebroJ03,
	Author = {Nathan Srebro and Tommi Jaakkola},
	Bibsource = {DBLP, http://dblp.uni-trier.de},
	Booktitle = {ICML},
	Crossref = {DBLP:conf/icml/2003},
	Pages = {720-727},
	Title = {Weighted Low-Rank Approximations},
	Year = {2003}}

@proceedings{DBLP:conf/icml/2003,
	Bibsource = {DBLP, http://dblp.uni-trier.de},
	Editor = {Tom Fawcett and Nina Mishra},
	Isbn = {1-57735-189-4},
	Publisher = {AAAI Press},
	Title = {Machine Learning, Proceedings of the Twentieth International Conference (ICML 2003), August 21-24, 2003, Washington, DC, USA},
	Year = {2003}}

@inproceedings{:TheNetflixPrize07,
	Author = {James Bennett and Stan Lanning},
	Crossref = {:KDDCup07},
	Date-Added = {2012-06-12 15:27:25},
	Eprinttype = {acm},
	Title = {The Netflix Prize},
	Year = {2007}}

@proceedings{:KDDCup07,
	Booktitle = {KDD Cup and Workshop},
	Date-Added = {2012-06-12 15:27:25},
	Title = {KDD Cup and Workshop},
	Year = {2007}}

@inproceedings{DBLP:conf/sigmod/FengKRR12,
	Author = {Xixuan Feng and Arun Kumar and Benjamin Recht and Christopher R{\'e}},
	Bibsource = {DBLP, http://dblp.uni-trier.de},
	Booktitle = {SIGMOD Conference},
	Crossref = {DBLP:conf/sigmod/2012},
	Ee = {http://doi.acm.org/10.1145/2213836.2213874},
	Pages = {325-336},
	Title = {Towards a unified architecture for in-RDBMS analytics},
	Year = {2012}}

@proceedings{DBLP:conf/sigmod/2012,
	Bibsource = {DBLP, http://dblp.uni-trier.de},
	Booktitle = {SIGMOD Conference},
	Editor = {K. Sel\c{c}uk Candan and Yi Chen and Richard T. Snodgrass and Luis Gravano and Ariel Fuxman},
	Ee = {http://dl.acm.org/citation.cfm?id=2213836},
	Isbn = {978-1-4503-1247-9},
	Publisher = {ACM},
	Title = {Proceedings of the ACM SIGMOD International Conference on Management of Data, SIGMOD 2012, Scottsdale, AZ, USA, May 20-24, 2012},
	Year = {2012}}

@article{DBLP:journals/siamrev/RechtFP10,
	Author = {Benjamin Recht and Maryam Fazel and Pablo A. Parrilo},
	Bibsource = {DBLP, http://dblp.uni-trier.de},
	Ee = {http://dx.doi.org/10.1137/070697835},
	Journal = {SIAM Review},
	Number = {3},
	Pages = {471-501},
	Title = {Guaranteed Minimum-Rank Solutions of Linear Matrix Equations via Nuclear Norm Minimization},
	Volume = {52},
	Year = {2010}}

@article{DBLP:journals/cacm/CandesR12,
	Author = {Emmanuel J. Cand{\`e}s and Benjamin Recht},
	Bibsource = {DBLP, http://dblp.uni-trier.de},
	Ee = {http://doi.acm.org/10.1145/2184319.2184343},
	Journal = {Commun. ACM},
	Number = {6},
	Pages = {111-119},
	Title = {Exact matrix completion via convex optimization},
	Volume = {55},
	Year = {2012}}

@inproceedings{DBLP:conf/kdd/GemullaNHS11,
	Author = {Rainer Gemulla and Erik Nijkamp and Peter J. Haas and Yannis Sismanis},
	Bibsource = {DBLP, http://dblp.uni-trier.de},
	Booktitle = {KDD},
	Crossref = {DBLP:conf/kdd/2011},
	Ee = {http://doi.acm.org/10.1145/2020408.2020426},
	Pages = {69-77},
	Title = {Large-scale matrix factorization with distributed stochastic gradient descent},
	Year = {2011}}

@proceedings{DBLP:conf/kdd/2011,
	Bibsource = {DBLP, http://dblp.uni-trier.de},
	Booktitle = {KDD},
	Editor = {Chid Apt{\'e} and Joydeep Ghosh and Padhraic Smyth},
	Isbn = {978-1-4503-0813-7},
	Publisher = {ACM},
	Title = {Proceedings of the 17th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining, San Diego, CA, USA, August 21-24, 2011},
	Year = {2011}}

@inproceedings{DBLP:conf/nips/DuchiAW10,
	Author = {John C. Duchi and Alekh Agarwal and Martin J. Wainwright},
	Bibsource = {DBLP, http://dblp.uni-trier.de},
	Booktitle = {NIPS},
	Crossref = {DBLP:conf/nips/2010},
	Ee = {http://books.nips.cc/papers/files/nips23/NIPS2010_0423.pdf},
	Pages = {550-558},
	Title = {Distributed Dual Averaging In Networks},
	Year = {2010}}

@proceedings{DBLP:conf/nips/2010,
	Bibsource = {DBLP, http://dblp.uni-trier.de},
	Booktitle = {NIPS},
	Editor = {John D. Lafferty and Christopher K. I. Williams and John Shawe-Taylor and Richard S. Zemel and Aron Culotta},
	Publisher = {Curran Associates, Inc.},
	Title = {Advances in Neural Information Processing Systems 23: 24th Annual Conference on Neural Information Processing Systems 2010. Proceedings of a meeting held 6-9 December 2010, Vancouver, British Columbia, Canada},
	Year = {2010}}

@inproceedings{DBLP:conf/nips/WrightGRPM09,
	Author = {John Wright and Arvind Ganesh and Shankar Rao and YiGang Peng and Yi Ma},
	Bibsource = {DBLP, http://dblp.uni-trier.de},
	Booktitle = {NIPS},
	Crossref = {DBLP:conf/nips/2009},
	Ee = {http://books.nips.cc/papers/files/nips22/NIPS2009_0116.pdf},
	Pages = {2080-2088},
	Title = {Robust Principal Component Analysis: Exact Recovery of Corrupted Low-Rank Matrices via Convex Optimization},
	Year = {2009}}

@proceedings{DBLP:conf/nips/2009,
	Bibsource = {DBLP, http://dblp.uni-trier.de},
	Booktitle = {NIPS},
	Editor = {Yoshua Bengio and Dale Schuurmans and John D. Lafferty and Christopher K. I. Williams and Aron Culotta},
	Isbn = {9781615679119},
	Publisher = {Curran Associates, Inc.},
	Title = {Advances in Neural Information Processing Systems 22: 23rd Annual Conference on Neural Information Processing Systems 2009. Proceedings of a meeting held 7-10 December 2009, Vancouver, British Columbia, Canada},
	Year = {2009}}

@article{springerlink:10.1007/s10107-011-0472-0,
	Affiliation = {Department of Electrical Engineering and Computer Science, Laboratory for Information and Decision Systems, M.I.T., Mass, Cambridge, MA 02139, USA},
	Author = {Bertsekas, Dimitri},
	Date-Modified = {2012-08-02 18:08:19 +0000},
	Doi = {10.1007/s10107-011-0472-0},
	Issn = {0025-5610},
	Issue = {2},
	Journal = {Mathematical Programming},
	Keyword = {Mathematics and Statistics},
	Pages = {163--195},
	Publisher = {Springer Berlin / Heidelberg},
	Title = {Incremental proximal methods for large scale convex optimization},
	Volume = {129},
	Year = {2011},
	Bdsk-Url-1 = {http://dx.doi.org/10.1007/s10107-011-0472-0}}

@book{nocedal2006numerical,
	Author = {Nocedal, J. and Wright, S.J.},
	Isbn = {9780387303031},
	Lccn = {2006923897},
	Publisher = {Springer},
	Series = {Springer series in operations research},
	Title = {Numerical optimization},
	Url = {http://books.google.com/books?id=eNlPAAAAMAAJ},
	Year = {2006},
	Bdsk-Url-1 = {http://books.google.com/books?id=eNlPAAAAMAAJ}}

@inproceedings{DBLP:conf/nips/BottouB07,
	Author = {L{\'e}on Bottou and Olivier Bousquet},
	Bibsource = {DBLP, http://dblp.uni-trier.de},
	Booktitle = {NIPS},
	Crossref = {DBLP:conf/nips/2007},
	Ee = {http://books.nips.cc/papers/files/nips20/NIPS2007_0726.pdf},
	Title = {The Tradeoffs of Large Scale Learning},
	Year = {2007}}

@proceedings{DBLP:conf/nips/2007,
	Bibsource = {DBLP, http://dblp.uni-trier.de},
	Booktitle = {NIPS},
	Editor = {John C. Platt and Daphne Koller and Yoram Singer and Sam T. Roweis},
	Publisher = {Curran Associates, Inc.},
	Title = {Advances in Neural Information Processing Systems 20, Proceedings of the Twenty-First Annual Conference on Neural Information Processing Systems, Vancouver, British Columbia, Canada, December 3-6, 2007},
	Year = {2008}}

@book{bertsekas1999nonlinear,
	Author = {Bertsekas, D.P.},
	Isbn = {9781886529007},
	Lccn = {95081359},
	Publisher = {Athena Scientific},
	Series = {Athena scientific optimization and computation series},
	Title = {Nonlinear programming},
	Url = {http://books.google.com/books?id=TgMpAQAAMAAJ},
	Year = {1999},
	Bdsk-Url-1 = {http://books.google.com/books?id=TgMpAQAAMAAJ}}

@article{hager2006survey,
	Author = {Hager, W. W. and Zhang, H.},
	Citeulike-Article-Id = {7511569},
	Journal = {Pacific journal of Optimization},
	Keywords = {bibliot-11-08-07},
	Number = {1},
	Pages = {35--58},
	Posted-At = {2010-07-19 10:35:37},
	Priority = {2},
	Publisher = {Citeseer},
	Title = {{A survey of nonlinear conjugate gradient methods}},
	Volume = {2},
	Year = {2006}}

@article{Cox1972,
	Author = {Cox, D.R.},
	Title= {Regression models and life-tables},
	Year = {1972},
	Journal = {Journal of the Royal Statistical Society. Series B (Methodological)},
	Volume = {34},
	Number = {2},
	Pages = {187-220},}

@proceedings{DBLP:conf/icml/2001,
  editor    = {Carla E. Brodley and
               Andrea Pohoreckyj Danyluk},
  title     = {Proceedings of the Eighteenth International Conference on
               Machine Learning (ICML 2001), Williams College, Williamstown,
               MA, USA, June 28 - July 1, 2001},
  publisher = {Morgan Kaufmann},
  year      = {2001},
  isbn      = {1-55860-778-1},
  bibsource = {DBLP, http://dblp.uni-trier.de}
}

@inproceedings{DBLP:conf/icml/LaffertyMP01,
  author    = {John D. Lafferty and
               Andrew McCallum and
               Fernando C. N. Pereira},
  title     = {Conditional Random Fields: Probabilistic Models for Segmenting
               and Labeling Sequence Data},
  booktitle = {ICML},
  year      = {2001},
  pages     = {282-289},
  bibsource = {DBLP, http://dblp.uni-trier.de}
}

@article{DBLP:journals/scholarpedia/Viterbi09,
  author    = {Andrew J. Viterbi},
  title     = {Viterbi algorithm},
  journal   = {Scholarpedia},
  volume    = {4},
  number    = {1},
  year      = {2009},
  pages     = {6246},
  ee        = {http://www.scholarpedia.org/article/Viterbi_algorithm},
  bibsource = {DBLP, http://dblp.uni-trier.de}
}

@article{DBLP:journals/siamjo/MoralesN00,
  author    = {Jos{\'e} Luis Morales and
               Jorge Nocedal},
  title     = {Automatic Preconditioning by Limited Memory Quasi-Newton
               Updating},
  journal   = {SIAM Journal on Optimization},
  volume    = {10},
  number    = {4},
  year      = {2000},
  pages     = {1079-1096},
  ee        = {http://dx.doi.org/10.1137/S1052623497327854},
  bibsource = {DBLP, http://dblp.uni-trier.de}
}

@article{DBLP:journals/coling/DeRose88,
  author    = {Steven J. DeRose},
  title     = {Grammatical Category Disambiguation by Statistical Optimization},
  journal   = {Computational Linguistics},
  volume    = {14},
  number    = {1},
  year      = {1988},
  pages     = {31-39},
  bibsource = {DBLP, http://dblp.uni-trier.de}
}

@inproceedings{DBLP:conf/naacl/ShaP03,
  author    = {Fei Sha and
               Fernando C. N. Pereira},
  title     = {Shallow Parsing with Conditional Random Fields},
  booktitle = {HLT-NAACL},
  year      = {2003},
  ee        = {http://acl.ldc.upenn.edu/N/N03/N03-1028.pdf},
  bibsource = {DBLP, http://dblp.uni-trier.de}
}

@article{DBLP:journals/coling/MarcusSM94,
  author    = {Mitchell P. Marcus and
               Beatrice Santorini and
               Mary Ann Marcinkiewicz},
  title     = {Building a Large Annotated Corpus of English: The Penn Treebank},
  journal   = {Computational Linguistics},
  volume    = {19},
  number    = {2},
  year      = {1993},
  pages     = {313-330},
  bibsource = {DBLP, http://dblp.uni-trier.de}
}

@techreport{armadillo,
	Author = {Conrad Sanderson},
	Institution = {NICTA},
	Title = {Armadillo: An Open Source {C++} Linear Algebra Library for Fast Prototyping and Computationally Intensive Experiments},
	Year = {2010}}

@misc{eigen,
	Author = {Ga\"{e}l Guennebaud and Beno\^{i}t Jacob and others},
 	Title = {Eigen v3},
	Url = {http://eigen.tuxfamily.org},
	Year = {2010}
}

@article{Blei:2003,
 author = {Blei, David M. and Ng, Andrew Y. and Jordan, Michael I.},
 title = {Latent dirichlet allocation},
 journal = {J. Mach. Learn. Res.},
 issue_date = {3/1/2003},
 volume = {3},
 month = mar,
 year = {2003},
 issn = {1532-4435},
 pages = {993--1022},
 numpages = {30},
 url = {http://dl.acm.org/citation.cfm?id=944919.944937},
 acmid = {944937},
 publisher = {JMLR.org},
}

@inproceedings{Wang:2009,
 author = {Wang, Yi and Bai, Hongjie and Stanton, Matt and Chen, Wen-Yen and Chang, Edward Y.},
 title = {PLDA: Parallel Latent Dirichlet Allocation for Large-Scale Applications},
 booktitle = {Proceedings of the 5th International Conference on Algorithmic Aspects in Information and Management},
 series = {AAIM '09},
 year = {2009},
 isbn = {978-3-642-02157-2},
 location = {San Francisco, CA, USA},
 pages = {301--314},
 numpages = {14},
 url = {http://dx.doi.org/10.1007/978-3-642-02158-9_26},
 doi = {10.1007/978-3-642-02158-9_26},
 acmid = {1574062},
 publisher = {Springer-Verlag},
}

@article{griffiths04finding,
  author={Thomas L. Griffiths and Mark Steyvers},
  title={Finding Scientific Topics},
  journal={PNAS},
  year={2004},
  volume={101},
  number={suppl. 1},
  pages={5228-5235},
}

@article{zou2005,
	author={Hui Zou and Trevor Hastie},
	title={Regularization and variable selection via th elastic net},
	journal={Journal of the Royal Statistical Society: Series B (Statistical Methodology)},
	year={2005},
	volume={67},
	issue={2},
	pages={301-320}
}

@article{tibshirani1996,
	author={Tibshirani, R.},
	year={1996},
	title={Regression shrinkage and selection via the lasso},
	journal={J. R. Statist. Soc. B},
	volume={58},
	pages={267-288}
}

@article{beck2009,
author = {Beck, Amir and Teboulle, Mark},
year = {2009},
title = {A Fast Iterative Shrinkage-Thresholding Algorithm for Linear Inverse Problems},
journal = {SIAM J. IMAGING SCIENCES},
volume = {2},
pages = {183-202}
}

@article{beck2009,
author = {Beck, Amir and Teboulle, Mark},
year = {2009},
title = {A Fast Iterative Shrinkage-Thresholding Algorithm for Linear Inverse Problems},
journal = {SIAM J. IMAGING SCIENCES},
volume = {2},
pages = {183-202}
}

@book{cameron2009,
  title={Microeconometrics using Stata},
  author={Cameron, A.C. and Trivedi, P.K.},
  isbn={9781597180481},
  lccn={2009277898},
  year={2009},
  publisher={Stata Press}
}

@article{diekmann2008,
author = {Andreas Diekmann and Ben Jann},
year = {2008},
title = {Regression Models for Categorical Dependent Variables}
}

@article{paige1982lsqr,
  title={LSQR: An algorithm for sparse linear equations and sparse least squares},
  author={Paige, Christopher C and Saunders, Michael A},
  journal={ACM Transactions on Mathematical Software (TOMS)},
  volume={8},
  number={1},
  pages={43--71},
  year={1982},
  publisher={ACM}
}

@misc{hestenes1952methods,
  title={Methods of conjugate gradients for solving linear systems},
  author={Hestenes, Magnus Rudolph and Stiefel, Eduard},
  year={1952},
  publisher={NBS}
}

@article{golub1964,
author = {Gene Golub and William Kahan},
year = {1964},
title = {Calculating the Singular Values and Pseudo-Inverse of a Matrix},
journal = {SIAM - Series B Numerical Analysis},
volume = {2},
pages = {205-224}
}

@article{simon2000,
    author = {Horst Simon and Hongyuan Zha},
    title = {Low Rank Matrix Approximation Using The Lanczos Bidiagonalization Process With Applications},
    journal = {SIAM J. Sci. Comput},
    year = {2000},
    volume = {21},
    pages = {2257--2274}
}

@article{grambsch1994proportional,
  title={Proportional hazards tests and diagnostics based on weighted residuals},
  author={Grambsch, Patricia M and Therneau, Terry M},
  journal={Biometrika},
  volume={81},
  number={3},
  pages={515--526},
  year={1994},
  publisher={Biometrika Trust}
}

@ONLINE{newtest,
  author = {Bruin, J.},
  title = {newtest: command to compute new test {@ONLINE}},
  month = FEB,
  year = {2011},
  url = {http://www.ats.ucla.edu/stat/stata/ado/analysis/}
}

@online{testph,
  author = {Bruin, J.},
  title = {Supplemental notes to Applied Survival Analysis
Applied Survival Analysis},
  url={http://www.ats.ucla.edu/stat/examples/asa/test_proportionality.htm},
  year = {2011}
}

@book{hosmer2011applied,
    title={Applied survival analysis: regression modeling of time to event data},
    author={Hosmer Jr, David W and Lemeshow, Stanley and May, Susanne},
    volume={618},
    year={2011},
    publisher={Wiley. com}
}

@book{kumar2006datamining,
    title={Introduction to Data Mining},
    author={Tan, Pang-Ning and Steinbach, Michael and Kumar, Vipin},
    year={2006},
    publisher={Addison-Wesley}
}

@book{mitchell1997machinelearning,
    title={Machine Learning},
    author={Mitchell, Tom},
    year={1997},
    publisher={McGraw-Hill Science/Engineering/Math}
}

@book{james2013statisticallearning,
    title={An Introduction to Statistical Learning},
    author={James, Gareth and Witten, Daniela and
    		Hastie, Trevor and Tibshirani, Robert},
    year={2013},
    publisher={Springer}
}

@book{hastie2008statisticallearning,
    title={The Elements of Statistical Learning},
    author={Hastie, Trevor and Tibshirani, Robert and Friedman, Jerome},
    year={2008},
    publisher={Springer}
}

@article{lin1989robust,
    title={The robust inference for the Cox proportional hazards model},
    author={Lin, DY and Wei, Lee-Jen},
    journal={Journal of the American Statistical Association},
    volume={84},
    number={408},
    pages={1074--1078},
    year={1989},
    publisher={Taylor \& Francis}
}

@inproceedings{chen2012optimizing,
  title={Optimizing statistical information extraction programs over evolving text},
  author={Chen, Fei and Feng, Xixuan and Re, Christopher and Wang, Min},
  booktitle={Data Engineering (ICDE), 2012 IEEE 28th International Conference on},
  pages={870--881},
  year={2012},
  organization={IEEE}
}

@online{glm_irls,
  author = {Wolfgang H\"{a}rdle and Marlene M\"{u}ller and Stefan Sperlich and Axel Werwatz },
  title = {Nonparametric and Semiparametric Models},
  url={http://sfb649.wiwi.hu-berlin.de/fedc_homepage/xplore/ebooks/html/spm/spmhtmlnode27.html},
  year = {2004}
}

@online{random_forest_home,
  author = {Leo Breiman and Adele Cutler},
  title = {Random Forests},
  url={http://www.stat.berkeley.edu/~breiman/RandomForests/cc_home.htm}
}

@online{r_random_forest,
  author = {Leo Breiman and Adele Cutler and Andy Liaw and Matthew Wiener},
  title = {randomForest: Breiman and Cutler's random forests for classification and regression},
  url={http://cran.r-project.org/web/packages/randomForest/index.html}
}

@book{fox2008applied,
  title={Applied regression analysis and generalized linear models},
  author={Fox, John},
  year={2008},
  publisher={Sage Publications}
}

@book{breiman1984cart,
	title={Classification and Regression Trees},
	author={Breiman, L. and Friedman, J. and Olshen, R. and Stone, C.},
	publisher={Wadsworth International Group},
	year={1984}
	}

@article{panda2009planet,
	title={PLANET: Massively Parallel Learning of Tree Ensembles with MapReduce},
	author={Panda, Biswanath and Herbach, Joshua and Basu, Sugato and Bayardo, Roberto},
	journal={Proceedings of the VLDB Endowment},
	volum={2},
	year={2009}
	}

@inproceedings{RR07,
	title={Random Features for Large-Scale Kernel Machines},
	author={Rahmini, Ali and Recht, Ben},
	journal={Neural Information Processing Systems},
	year={2007}
	}

@inproceedings{KK12,
	title={Random Feature Maps for Dot Product Kernels},
	author={Kar, Purushottam and Karnick, Harish},
	journal={Proceedings of AISTATS},
	year={2012}
	}

@inproceedings{ShSS07,
	title={Pegasos: Primal Estimated Sub-Gradient Solver for SVM},
	author={Shalev-Shwartz, Shai and Singer, Yoram and Srebro, Nathan},
	journal={Proceedings of the 24th International Conference on Machine Learning},
	year={2007}
	}

@article{Scholkopf,
 author = {Sch\"{o}lkopf, Bernhard and Platt, John C. and Shawe-Taylor, John C. and Smola, Alex J. and Williamson, Robert C.},
 title = {Estimating the Support of a High-Dimensional Distribution},
 journal = {Neural Computation},
 volume = {13},
 number = {7},
 year = {2001},
 pages = {1443--1471},
}

@article{bellman1958routing,
  title={On a routing problem},
  author={Bellman, Richard},
  journal={Quarterly of applied mathematics},
  pages={87--90},
  year={1958},
  publisher={JSTOR}
}

@techreport{ford1956network,
  title={Network flow theory},
  author={Ford Jr, Lester R},
  year={1956},
  institution={DTIC Document}
}

@inproceedings{pagerank,
       booktitle = {Seventh International World-Wide Web Conference (WWW)},
           title = {The Anatomy of a Large-Scale Hypertextual Web Search Engine},
          author = {S. Brin and L. Page},
            year = {1998}
}

@misc{apsp,
	title = {All Pairs Shortest Paths},
	author = {Rendell, Alistair},
	howpublished = {\url{http://users.cecs.anu.edu.au/~Alistair.Rendell/Teaching/apac_comp3600/module4/all_pairs_shortest_paths.xhtml}},
	note = {Accessed: 2017-06-07}
}

@inproceedings{grail,
  author    = {Jing Fan and
               Adalbert Gerald Soosai Raj and
               Jignesh M. Patel},
  title     = {The Case Against Specialized Graph Analytics Engines},
  booktitle = {{CIDR} 2015, Seventh Biennial Conference on Innovative Data Systems
               Research, Asilomar, CA, USA, January 4-7, 2015, Online Proceedings},
  year      = {2015}
}

@misc{ann_wiki,
    Url = {http://en.wikipedia.org/wiki/Artificial_neural_network},
    Title = {Artificial neural network},
    Author = {Wikipedia}
}

@misc{mlp_wiki,
    Url = {http://en.wikipedia.org/wiki/Multilayer_perceptron},
    Title = {Multilayer perceptron},
    Author = {Wikipedia}
}

@misc{mlp_gradient_wisc,
    Url = {http://homepages.cae.wisc.edu/~ece539/videocourse/notes/pdf/lec%2011%20MLP%20(3)%20BP.pdf},
    Title = {{MLP(III): Back-Propagation}},
    Author = {{Yu Hen Hu}}
}

@online{bfs_wikipedia,
   title = {Breadth-first search},
   url={https://en.wikipedia.org/wiki/Breadth-first_search}
}

@misc{mlp_parallel,
    Url = {https://www.microsoft.com/en-us/research/publication/accelerating-recurrent-neural-network-training-via-two-stage-classes-and-parallelization/},
    Title = {{Accelerating Recurrent Neural Network Training via Two Stage Classes and Parallelization}},
    Author = {{Zhiheng Huang}}
}

 @article{hits,
 author = {Kleinberg, Jon M.},
 title = {Authoritative Sources in a Hyperlinked Environment},
 journal = {J. ACM},
 issue_date = {Sept. 1999},
 volume = {46},
 number = {5},
 month = sep,
 year = {1999},
 pages = {604--632},
 doi = {10.1145/324133.324140},
 acmid = {324140},
 publisher = {ACM}
}

@misc{momentum_cs231n,
    Url = {http://cs231n.github.io/neural-networks-3/#sgd},
    Title = {{CS231n Convolutional Neural Networks for Visual Recognition}},
}

@misc{momentum_ilya,
    Url = {http://www.cs.utoronto.ca/~ilya/pubs/ilya_sutskever_phd_thesis.pdf},
    Title = {{TRAINING RECURRENT NEURAL NETWORKS}},
    Author = {{Ilya Sutskever}}
}

@misc{medium_knn,
    Url = {https://medium.com/@adi.bronshtein/a-quick-introduction-to-k-nearest-neighbors-algorithm-62214cea29c7},
    Title = {{A quick introduction to k nearest neighbors algorithm}},
}

@misc{point_knn,
    Url = {http://pointclouds.org/documentation/tutorials/kdtree_search.php},
    Title = {{How to use a KdTree to search}},
}

@misc{rmsprop_hinton,
	Url = {http://www.cs.toronto.edu/~tijmen/csc321/slides/lecture_slides_lec6.pdf},
	Title = {{Neural Networks for Machine Learning}}
}

@article{adam_kingma,
author = {Kingma, Diederik and Ba, Jimmy},
year = {2014},
month = {12},
pages = {},
title = {Adam: A Method for Stochastic Optimization},
journal = {International Conference on Learning Representations}
}