Jul 15, 2012
diff --git a/numpy/linalg/linalg.py b/numpy/linalg/linalg.py
    """
    Return matrix rank of array using SVD method

    Rank of the array is the number of SVD singular values of the
 array that aregreater than `tol`.
    Rank of the array is the number of SVD singular values of the array that are
    greater than `tol`.

    Parameters
    ----------
       threshold below which SVD values are considered zero. If `tol` is
       None, and ``S`` is an array with singular values for `M`, and
       ``eps`` is the epsilon value for datatype of ``S``, then `tol` is
       set to ``S.max() * eps``.
       set to ``S.max() *max(M.shape) *eps``.

    Notes
    -----
    Golub and van Loan [1]_ define "numerical rank deficiency" as using
    tol=eps*S[0] (where S[0] is the maximum singular value and thus the
    2-norm of the matrix). This is one definition of rank deficiency,
    and the one we use here.  When floating point roundoff is the main
    concern, then "numerical rank deficiency" is a reasonable choice. In
    some cases you may prefer other definitions. The most useful measure
    of the tolerance depends on the operations you intend to use on your
    matrix. For example, if your data come from uncertain measurements
    with uncertainties greater than floating point epsilon, choosing a
    tolerance near that uncertainty may be preferable.  The tolerance
    may be absolute if the uncertainties are absolute rather than
    relative.
    The default threshold to detect rank deficiency is a test on the magnitude
    of the singular values of `M`.  By default, we identify singular values less
    than ``S.max() * max(M.shape) * eps`` as indicating rank deficiency (with
    the symbols defined above). This is the algorithm MATLAB uses [1].  It also
    appears in *Numerical recipes* in the discussion of SVD solutions for linear
    least squares [2].

    This default threshold is designed to detect rank deficiency accounting for
    the numerical errors of the SVD computation.  Imagine that there is a column
    in `M` that is an exact (in floating point) linear combination of other
    columns in `M`. Computing the SVD on `M` will not produce a singular value
    exactly equal to 0 in general: any difference of the smallest SVD value from
    0 will be caused by numerical imprecision in the calculation of the SVD.
    Our threshold for small SVD values takes this numerical imprecision into
    account, and the default threshold will detect such numerical rank
    deficiency.  The threshold may declare a matrix `M` rank deficient even if
    the linear combination of some columns of `M` is not exactly equal to
    another column of `M` but only numerically very close to another column of
    `M`.

    We chose our default threshold because it is in wide use.  Other thresholds
    are possible.  For example, elsewhere in the 2007 edition of *Numerical
    recipes* there is an alternative threshold of ``S.max() *
    np.finfo(M.dtype).eps / 2. * np.sqrt(m + n + 1.)``. The authors describe
    this threshold as being based on "expected roundoff error" (p 71).

    The thresholds above deal with floating point roundoff error in the
    calculation of the SVD.  However, you may have more information about the
    sources of error in `M` that would make you consider other tolerance values
    to detect *effective* rank deficiency.  The most useful measure of the
    tolerance depends on the operations you intend to use on your matrix.  For
    example, if your data come from uncertain measurements with uncertainties
    greater than floating point epsilon, choosing a tolerance near that
    uncertainty may be preferable.  The tolerance may be absolute if the
    uncertainties are absolute rather than relative.

    References
    ----------
    .. [1] G. H. Golub and C. F. Van Loan, *Matrix Computations*.
       Baltimore: Johns Hopkins University Press, 1996.
    .. [1] MATLAB reference documention, "Rank"
           http://www.mathworks.com/help/techdoc/ref/rank.html
    .. [2] W. H. Press, S. A. Teukolsky, W. T. Vetterling and B. P. Flannery,
           "Numerical Recipes (3rd edition)", Cambridge University Press, 2007,
           page 795.

    Examples
    --------
    1
    >>> matrix_rank(np.zeros((4,)))
    0

    """
    M = asarray(M)
    if M.ndim > 2:
        return int(not all(M==0))
    S = svd(M, compute_uv=False)
    if tol is None:
        tol = S.max() * finfo(S.dtype).eps
        tol = S.max() *max(M.shape) *finfo(S.dtype).eps
    return sum(S > tol)


diff --git a/numpy/linalg/tests/test_linalg.py b/numpy/linalg/tests/test_linalg.py
 import sys

 import numpy as np
 from numpy.testing import *
 from numpy.testing import (TestCase, assert_, assert_equal, assert_raises,
                           assert_array_equal, assert_almost_equal,
                           run_module_suite)
 from numpy import array, single, double, csingle, cdouble, dot, identity
 from numpy import multiply, atleast_2d, inf, asarray, matrix
 from numpy import linalg
        yield assert_equal, matrix_rank(1), 1


 def test_reduced_rank():
    # Test matrices with reduced rank
    rng = np.random.RandomState(20120714)
    for i in range(100):
        # Make a rank deficient matrix
        X = rng.normal(size=(40, 10))
        X[:, 0] = X[:, 1] + X[:, 2]
        # Assert that matrix_rank detected deficiency
        assert_equal(matrix_rank(X), 9)
        X[:, 3] = X[:, 4] + X[:, 5]
        assert_equal(matrix_rank(X), 8)


 class TestQR(TestCase):
    def test_qr_empty(self):
        a = np.zeros((0,2))
Original file line number	Diff line number	Diff line change
Expand Up		@@ -1420,8 +1420,8 @@ def matrix_rank(M, tol=None):
		"""
		Return matrix rank of array using SVD method

		Rank of the array is the number of SVD singular values of the
		array that aregreater than `tol`.
		Rank of the array is the number of SVD singular values of the array that are
		greater than `tol`.

		Parameters
		----------
Expand All		@@ -1431,27 +1431,53 @@ def matrix_rank(M, tol=None):
		threshold below which SVD values are considered zero. If `tol` is
		None, and ``S`` is an array with singular values for `M`, and
		``eps`` is the epsilon value for datatype of ``S``, then `tol` is
		set to ``S.max() * eps``.
		set to ``S.max() max(M.shape) eps``.

		Notes
		-----
		Golub and van Loan [1]_ define "numerical rank deficiency" as using
		tol=eps*S[0] (where S[0] is the maximum singular value and thus the
		2-norm of the matrix). This is one definition of rank deficiency,
		and the one we use here. When floating point roundoff is the main
		concern, then "numerical rank deficiency" is a reasonable choice. In
		some cases you may prefer other definitions. The most useful measure
		of the tolerance depends on the operations you intend to use on your
		matrix. For example, if your data come from uncertain measurements
		with uncertainties greater than floating point epsilon, choosing a
		tolerance near that uncertainty may be preferable. The tolerance
		may be absolute if the uncertainties are absolute rather than
		relative.
		The default threshold to detect rank deficiency is a test on the magnitude
		of the singular values of `M`. By default, we identify singular values less
		than ``S.max() * max(M.shape) * eps`` as indicating rank deficiency (with
		the symbols defined above). This is the algorithm MATLAB uses [1]. It also
		appears in Numerical recipes in the discussion of SVD solutions for linear
		least squares [2].

		This default threshold is designed to detect rank deficiency accounting for
		the numerical errors of the SVD computation. Imagine that there is a column
		in `M` that is an exact (in floating point) linear combination of other
		columns in `M`. Computing the SVD on `M` will not produce a singular value
		exactly equal to 0 in general: any difference of the smallest SVD value from
		0 will be caused by numerical imprecision in the calculation of the SVD.
		Our threshold for small SVD values takes this numerical imprecision into
		account, and the default threshold will detect such numerical rank
		deficiency. The threshold may declare a matrix `M` rank deficient even if
		the linear combination of some columns of `M` is not exactly equal to
		another column of `M` but only numerically very close to another column of
		`M`.

		We chose our default threshold because it is in wide use. Other thresholds
		are possible. For example, elsewhere in the 2007 edition of *Numerical
		recipes* there is an alternative threshold of ``S.max() *
		np.finfo(M.dtype).eps / 2. * np.sqrt(m + n + 1.)``. The authors describe
		this threshold as being based on "expected roundoff error" (p 71).

		The thresholds above deal with floating point roundoff error in the
		calculation of the SVD. However, you may have more information about the
		sources of error in `M` that would make you consider other tolerance values
		to detect effective rank deficiency. The most useful measure of the
		tolerance depends on the operations you intend to use on your matrix. For
		example, if your data come from uncertain measurements with uncertainties
		greater than floating point epsilon, choosing a tolerance near that
		uncertainty may be preferable. The tolerance may be absolute if the
		uncertainties are absolute rather than relative.

		References
		----------
		.. [1] G. H. Golub and C. F. Van Loan, Matrix Computations.
		Baltimore: Johns Hopkins University Press, 1996.
		.. [1] MATLAB reference documention, "Rank"
		http://www.mathworks.com/help/techdoc/ref/rank.html
		.. [2] W. H. Press, S. A. Teukolsky, W. T. Vetterling and B. P. Flannery,
		"Numerical Recipes (3rd edition)", Cambridge University Press, 2007,
		page 795.

		Examples
		--------
Expand All		@@ -1465,7 +1491,6 @@ def matrix_rank(M, tol=None):
		1
		>>> matrix_rank(np.zeros((4,)))
		0

		"""
		M = asarray(M)
		if M.ndim > 2:
Expand All		@@ -1474,7 +1499,7 @@ def matrix_rank(M, tol=None):
		return int(not all(M==0))
		S = svd(M, compute_uv=False)
		if tol is None:
		tol = S.max() * finfo(S.dtype).eps
		tol = S.max() max(M.shape) finfo(S.dtype).eps
		return sum(S > tol)


Expand Down
Original file line number	Diff line number	Diff line change
Expand Up		@@ -3,7 +3,9 @@
		import sys

		import numpy as np
		from numpy.testing import *
		from numpy.testing import (TestCase, assert_, assert_equal, assert_raises,
		assert_array_equal, assert_almost_equal,
		run_module_suite)
		from numpy import array, single, double, csingle, cdouble, dot, identity
		from numpy import multiply, atleast_2d, inf, asarray, matrix
		from numpy import linalg
Expand DownExpand Up		@@ -451,6 +453,19 @@ def test_matrix_rank(self):
		yield assert_equal, matrix_rank(1), 1


		def test_reduced_rank():
		# Test matrices with reduced rank
		rng = np.random.RandomState(20120714)
		for i in range(100):
		# Make a rank deficient matrix
		X = rng.normal(size=(40, 10))
		X[:, 0] = X[:, 1] + X[:, 2]
		# Assert that matrix_rank detected deficiency
		assert_equal(matrix_rank(X), 9)
		X[:, 3] = X[:, 4] + X[:, 5]
		assert_equal(matrix_rank(X), 8)


		class TestQR(TestCase):
		def test_qr_empty(self):
		a = np.zeros((0,2))
Expand Down