Key Tools For Scientific Computing In Python

Vlad Niculae

vlad@vene.ro http://vene.ro

University of Bucharest

University of Wolverhampton


Numpy

In [2]:
import numpy as np
X = np.eye(3)
X
Out [2]:
array([[ 1.,  0.,  0.],
       [ 0.,  1.,  0.],
       [ 0.,  0.,  1.]])
In [3]:
np.sqrt(2 * X)
Out [3]:
array([[ 1.41421356,  0.        ,  0.        ],
       [ 0.        ,  1.41421356,  0.        ],
       [ 0.        ,  0.        ,  1.41421356]])
In [4]:
X.flags
Out [4]:
  C_CONTIGUOUS : True
  F_CONTIGUOUS : False
  OWNDATA : True
  WRITEABLE : True
  ALIGNED : True
  UPDATEIFCOPY : False
In [5]:
Y = X[1:2, 1:2]
Y.flags
Out [5]:
  C_CONTIGUOUS : False
  F_CONTIGUOUS : False
  OWNDATA : False
  WRITEABLE : True
  ALIGNED : True
  UPDATEIFCOPY : False
In [28]:
china_mem = np.memmap('china.mem', shape=(427, 640, 3))
# shape needs to be specified, file is 1:1 copy of the memory layout
In [29]:
imshow(china_mem)
xticks(())
yticks(())
Out [29]:
([], <a list of 0 Text yticklabel objects>)

Scipy

In [44]:
X = np.diag([1., 1, 2, 3, 5, 8])
print X  # Compared to what we work with, this is very dense!
print 'Sparsity: %.2f' % (np.count_nonzero(X) * 1.0 / np.prod(X.shape))
[[ 1.  0.  0.  0.  0.  0.]
 [ 0.  1.  0.  0.  0.  0.]
 [ 0.  0.  2.  0.  0.  0.]
 [ 0.  0.  0.  3.  0.  0.]
 [ 0.  0.  0.  0.  5.  0.]
 [ 0.  0.  0.  0.  0.  8.]]
Sparsity: 0.17
In [31]:
import scipy.sparse as sp
X_sp = sp.csr_matrix(X)
In [52]:
X_sp.data
Out [52]:
array([ 1.,  1.,  3.,  5.,  7.])
In [77]:
from scipy.sparse.linalg import eigsh
eigsh(X_sp, k=3)  # Compute the 3 largest eigenpairs
Out [77]:
(array([-1.,  1.,  1.]),
 array([[  7.06274075e-01,   3.80996432e-02,  -6.89839534e-01],
       [ -7.06274075e-01,   3.80996432e-02,  -6.89839534e-01],
       [ -3.43064373e-02,  -1.36082297e-01,  -1.55131209e-01],
       [ -2.77555756e-17,  -9.79826541e-01,  -1.05570294e-02],
       [  3.43064373e-02,  -1.36082297e-01,  -1.55131209e-01]]))
In [78]:
X_sp = X_sp.tolil()                   # Build a nice
X_sp[:2, :2] = np.fliplr(np.eye(2))   # adjacency
X_sp[2:, 2:] = np.fliplr(np.eye(3))   # matrix
X_sp.toarray()
Out [78]:
array([[ 0.,  1.,  0.,  0.,  0.],
       [ 1.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  1.],
       [ 0.,  0.,  0.,  1.,  0.],
       [ 0.,  0.,  1.,  0.,  0.]])
In [88]:
from scipy.sparse import csgraph
csgraph.connected_components(X_sp)
Out [88]:
(3, array([0, 0, 1, 2, 1], dtype=int32))

IPython

In [92]:
from IPython.parallel import Client
c = Client()
print "Number of available engines: %d " % len(c)
Number of available engines: 4 
In [101]:
def do_work():
    from time import sleep
    sleep(1)

print "Distributed:",
%timeit -r 1 -n 1 c[:].apply_sync(do_work)
print "Local: ",
%timeit -r 1 -n 1 [do_work() for _ in xrange(len(c))]
Distributed: 1 loops, best of 1: 1.01 s per loop
Local:  1 loops, best of 1: 4 s per loop

Behind the scenes, this connects to an IPController

The nodes can be

More info: the IPython parallel documentation

Joblib

A lightweight but invaluable package for:

In [127]:
rank = lambda X: np.linalg.lstsq(X, np.ones(len(X)))[2]

from sklearn.externals.joblib import Memory
rank = Memory(cachedir='/tmp/joblib').cache(rank)

rank(np.diag([1, 0, 1, 0]))
________________________________________________________________________________
[Memory] Calling __main__--Users-vene-<ipython-input-127-625987046086>.<lambda>...
<lambda>(array([[1, 0, 0, 0],
       [0, 0, 0, 0],
       [0, 0, 1, 0],
       [0, 0, 0, 0]]))
_________________________________________________________<lambda> - 0.0s, 0.0min
Out [127]:
2
In [128]:
rank(np.diag([1, 0, 1, 0]))  # The result is cached
Out [128]:
2
In [167]:
from sklearn.externals.joblib.parallel import Parallel, delayed

# Parralelize stemming with nltk:
from nltk.stem import PorterStemmer

stemmer = PorterStemmer()

def stem(word):
    return stemmer.stem(word)

words = """
Question : Is the Normal Curve ever a Skewed Distribution ?

Answer : Always ! The Lower Half of the Normal Curve is Negatively Skewed and the Upper Half is Positively Skewed !
""".split()
stems = Parallel(n_jobs=4, verbose=1)(delayed(stem)(w) for w in words)
print ' '.join(stems)
Question : Is the Normal Curv ever a Skew Distribut ? Answer : Alway ! The Lower Half of the Normal Curv is Neg Skew and the Upper Half is Posit Skew !
[Parallel(n_jobs=4)]: Done   1 out of  25 | elapsed:    0.0s remaining:    0.1s
[Parallel(n_jobs=4)]: Done  33 out of  33 | elapsed:    0.0s finished
In [165]:
from sklearn.externals.joblib import dump, load