Logo
* Most of the material is taken from this online course and this book.

Strings "123"

String Methods
s = 'abcabc'
s.upper()
s.isupper()
s.capitalize()
s.index('b')
s.index('z')
s.find('b')
s.find('z')
s.count('c')
s = s.replace('b','z')
s = s.replace('b','z',1)
t = 'abc def ghi'
t.split(' ')
output = ''.join(i for i in list1)
"ABCABC"
False
"Abc"
1
error
1
-1
2
"azcazc"
"azcabc"

['abc', 'def', 'ghi']


String Slicing
s = 'abcd'
s[0]
s[-1]
s[1:3]
s[::-1]
a
d
bc
dcba

Since strings are immutable, there is no method to replace an arbitrary character of a string (e.x. replacing the second 'o' in 'foo'). But we can use the following function (taken from here ):
    def arbit_replace(s, i, c):
        return s[:i] + c + s[i + 1:]

    print arbit_replace('foo', 2, 'z')
foz

Lists [1, 2, 3]

list1 = ['a', 'b', 'c']
list1.remove('b')
del list1[-1]
list1.append('z')
list1.insert(1,63)
['a','c']
['a']
['a', 'z']
['a', 63, 'z']

Enueration
for index, item in my_list:
    do_something(index, item)

If we just want the indexes:
for i in range(len(documents)):  # not Pythonic
    do_something(i) 
for i, _ in enumerate(documents):  # Pythonic
    do_something(i) 

List Comprehension:
list1 = [i for i in range(100) if i not in range(30, 50)]
list2 = [(x, y)
         for x in range(10)
         for y in range(10)]
list3 = [(x, y) 
         for x in range(10)
         for y in range(x + 1, 10)]
list4 = [0 for _ in range(5)]


Concatenating lists:
x = [1, 2]
x.extend([3, 4])  # x is [1, 2, 3, 4]

If you don't want to modify x use list addition:
x = [1, 2]
y = x + [3, 4] # y is [1, 2, 3, 4]; x is unchanged

Appending to lists one item at a time:
x = [1, 2]
x.append(0) # x is now [1, 2, 0]

Tuples (1, 2, 3)

Tuples are lists' immutable cousins. Pretty much anything you can do to a list that doesn't involve modifying it, you can do to a tuple.
tuple1 = (3)
print tuple1
type(tuple1)
tup2 = (3,)
print tuple2
type(tuple2)
3
<type 'int'>
(3,) <type 'tuple'>

Dictionaries {1:'a', 2:'b', 3:'c'}

dic={'a':'cat', 'b':'bear', 'c':'cow'}
dic.keys()
dic.values()
dic['b']
'a' in dic
'cat' in dic
'cat' in dic.values()
del dic['b']
dic['d'] = 'dog'
dic['z']
dic.get('z')
dic.get('z', 'nothing')

['a', 'b', 'c']
['cat, 'bear', 'cow']
'bear'
True
False
True
{'a':'cat', 'c':'cow'}
{'a':'cat', 'c':'cow', 'd':'dog'}
KeyError
None
nothing

for key, value in aDict.iteritems():
    print "letter: {} Animal: {}".format(key,value)

a = ''.join('{}{}'.format(key, str(val)) for key, val in dict1.items())

iteritems() to items() is like xrange to range.

defaultdict
A defaultdict is like a regular dictionary, except that when you try to look up a key it doesn't contain, it first adds a value for it using a zero-argument function you provided when you created it.
from collections import defaultdict

a = defaultdict(int)
b = defaultdict(str)
c = defaultdict(list)
d = defaultdict(tuple)
e = defaultdict(set)
f = defaultdict(dict)

g = defaultdict(lambda: [0, 0])
g[2][1] = 1

print(a[1])
print(b[1])
print(c[1])
print(d[1])
print(e[1])
print(f[1])
print(g[1])
print(g.items())
0

[]
()
set([])
{}
[0, 0]
[(1, [0, 0]), (2, [0, 1])]


Counter

Sets {1, 2, 3}

Set is like list but repetition doesn't matter. {1,1,2} = {1,2}

Array and Matrix

a = [1] * 3
[1, 1, 1]

The following functions do the same thing (making a zero matrix of m by n)
def matrix1(m,n):
    M = []
    for i in xrange(m):
        M.append([])
        for j in xrange(n):
            M[i].append(0)
    return M
    
def matrix2(m,n):
    M = []
    for i in xrange(m):
        M.append([0]*n)
    return M

def matrix3(m,n):
    return [[0] * (n) for i in xrange(m)]

Exceptions

def listDivision(l1,l2):
    '''Assumes: l1 and l2 are lists of same size
    Returns a list containing the meaningful
    values of l1[i]/l2[i]'''
    result=[]
    for n in range(len(l1)):
        try:
            result.append(l1[n]/float(l2[n]))
        except ZeroDivisionError, e:
            result.append(float('NaN'))
            print e
        except: #For all other kinds of exceptions
            result.append('Bad Arg')
            print 'listDivision called with bad arg'
    return result
>>> listDivision([1,2,3],[4,5,6])
[0.25, 0.4, 0.5]
>>> listDivision([1,2,3],[4,5,0])
float division by zero
[0.25, 0.4, 'NaN']

>>> listDivision([1,2,3],[4,5,'a'])
listDivision called with bad arg
[0.25, 0.4, 'Bad Arg']

Classes

class Coordinate(object):
    def __init__(self, x0, y0):
        self.x = x0
        self.y = y0

c = Coordinate(3,4)
Origin = Coordinate(0,0)

frame
isinstance(c, Coordinate)
True

class Coordinate(object):
    def __init__(self, x0, y0):
        self.x = x0
        self.y = y0
    def __str__(self):
        return "<"+str(self.x)+","+str(self.y)+">"
    def xDifference(self,other):
        return (self.x - other.x)

Generators

def genTest():
    yield 1
    yield 2
foo = genTest()
print foo.next()
print foo.next()
print foo.next()
#---------- alternatively
for n in foo:
    print n
1
2
StopIteration Exception

1
2

def genFib():
    fibn_1 = 1 #fib(n-1)
    fibn_2 = 0 #fib(n-2)
    while True:
        next = fibn_1 + fibn_2
        yield next
	fibn_2 = fibn_1
	fibn_1 = next
fib = genFib()
for i in range(5):
    print fib.next()
1 2 3 5 8

Generator Expressions (alternative of list comprehention to save memory)
The first example above can be written as follows
foo = (i for i in xrange(2))

a = xrange(3)
print list(a)
print tuple(a)
print set(a)
b=['a','b','c']
print dict(zip(a,b))
[0, 1, 2]
(0, 1, 2)
set([0, 1, 2])

{0: 'a', 1: 'b', 2: 'c'}

Misc.

print
a=['a', 'b', 'c']
for index, item in enumerate(a):
    print '%d. %s' % (index, item)


0. a
1. b
2. c

print "Case #{}: {}".format(1, 2)
case #1: 2

Input
f = open('pathToFile', [option])
for line in data:
    print(line.rstrip('\n'))  # rstrip('\n') is for ignoring newline symbols
                              # which causes to print an empty line between lines

[option] can be 'r' (read) [default], 'w' (write), 'r+' (read and write) and 'a' (append).

Using with block, closes the file automatically afterwards:
with open('pathToFile', [option]) as f:
    do something

guess = 50
x = raw_input('Is your number ' + str(guess) + '?' + '\n Enter \'Yes\' or \'No\': ')

Is your number 50?
Enter 'Yes' or 'No':

copying
This explains the difference between reference assignment, shallow copying and deep copying.
[:] makes a shallow copy of a string or list. dic.copy() do the same for dictionaries.
If our list is a list of lists or list of objects (similar for dictionaries), then we need to do deep copying so that altering the copy doesn't alter the original:
import copy
list2 = copy.deepcopy(list1)

Divisions
9%2
9/2
-9/2
9.0/2
9/2.0
9.0/2.0
1
4
5
4.5
4.5
4.5

*In Python 3.x 9/2 = 4.5. We can achieve this in Python 2.2+ with from __future__ import division;
Difference between == and is
The equality operator (x == y) tests the values of x and y for equality, while the identity operators (is) tests two objects to see whether they refer to the same object in memory. In general, it may be the case that x == y, but x is not y.
>>> a = [1, 2, 3]
>>> b = a
>>> b is a
True
>>> b == a
True
>>> a = [1, 2, 3]
>>> b = a[:]
>>> b is a
False
>>> b == a
True
Sorting a list with two rules
l=[(4,'mouse'),(1,'cat'),(3,'horse'),(2,'pog'),(3,'cow'),(2,'dog')]
l.sort(key=lambda x: (-x[0], x[1]))
[(4, 'mouse'), (3, 'cow'), (3, 'horse'), (2, 'dog'), (2, 'pog'), (1, 'cat')]

If an element is a calss instance, we need to use dot notation. E.x.
l.sort(key=lambda, x: (x[0].name, x[1].age))
Measure the runtime of a program
import time
start = time.time()
[body]
end = time.time()
print end - start

Another way is from shell: time python [py file]

For small bits of Python code we can use timeit (it can be run from python interface as well):
python -m timeit [-n N] [-r N] [statement ...]
where -n N, (--number=N) denotes how many times to execute ‘statement’, and -r N, (--repeat=N) denotes how many times to repeat the timer (default 3).
Reference to the current time
import time
print time.strftime("%Y-%m-%d %H:%M:%S")

If we don't care about the representation, time.time() is good enough. It returns the value of time in seconds since the Epoch.
Random Numbers
import random

r_num1 = random.random()  # random number between 0 and 1
r_num2 = random.randrange(3, 9)  # random integer number between 3 and 8
shuffled_list = random.shuffle(my_list)  # shuffles the list
chosen_member = random.chice(my_list)  # chooses a member of the list
chosen_members = random.sample(my_list, 4)  # chooses 4 member of the list without replacement 
chosen_members_with_replacement = [random.choice(my_list) for _ in range(3)]

# use random.seed to get reproducible results
random.seed(5)
print random.random()  # 0.62290169489
random.seed(5)
print random.random()  # 0.62290169489

zip
zip(['a', 'b', 'c'], [1, 2, 3])  # [('a', 1), ('b', 2), ('c', 3)]
zip(('a', 1), ('b', 2), ('c', 3))  # [('a', 'b', 'c'), (1, 2, 3)]
data = [('a', 1), ('b', 2), ('c', 3)]
zip(*data)  # [('a', 'b', 'c'), (1, 2, 3)]

*args and **kwargs
The general use of these is to produce higher-order functions whose inputs can accept arbitrary arguments.
When we define a function using *args and **kwargs, args is a tuple of its unnamed arguments and kwargs is a dict of its named arguments:
def test(*args, **kwargs):
    print "unnamed args:", args
    print "keyword args:", kwargs

test(1, 2, key="word", key2="word2")  # unnamed args: (1, 2)
                                      # keyword args: {'key2': 'word2', 'key': 'word'}

It works the other way too:
def test2(x, y, z):
    return x + y + z

x_y_list = [1, 2]
z_dict = { "z" : 3 }
print test2(*x_y_list, **z_dict)  # 6

matplotlib
import matplotlib.pyplot as plt

plt.axis([x_start, x_end, y_start, y_end])
plt.xticks(tick_list)
plt.xlabel("x_Label")
plt.ylabel("y_label")
plt.title("Title")
plt.legend(loc=9)  # assuming 'label' is defined. 9 means top-center
plt.show()

Types of plots
#--- Line Chart ---
plt.plot(x_list, y_list, color='green', marker='o', linestyle='solid', label='my_label', linewidth=2)  
# The arguments color/marker/linestyle can be shortened, e.g. 'go-'. 
# full list of arguments can be found here.

#--- Bar Chart ---
plt.bar(x_list, y_list, bar_width)
# full list of arguments can be found here.

#--- Scatterplots ---
plt.scatter(x_list, y_list)

Annotation
x_list = [...]
y_list = [...]
labels = [...]

plt.scatter(x_list, y_list)

# label each point
for label, x_item, y_item in zip(labels, x_list, y_list):
    plt.annotate(label,
        xy=(x_item, y_item),  # put the label with its point
        xytext=(5, -10),  # but slightly offset
        textcoords='offset points')

# full list of arguments can be found here.

Misc.
plt.axis("scaled")  # Equal scaling by changing box dimensions
# full list of arguments can be found here.

sys

sys.argv is the list of command-line arguments. So sys.argv[0] is the name of the program itself