Performance comparison (Python & Numpy & Numba & Cython & C)

10. Performance comparison (Python & Numpy & Numba & Cython & C)#

The following program is a simulation of particle motion with the lenaard-jones potential, with n controlling the number of particles and iterations controlling the number of iterations. The article concludes with a comparison of the performance of the Python version, the vectorisation optimised version using Numpy, the optimised version using Cython, and the version using C.

Model of particle motion: the forces between particles utilise the Lennard Jones potential, whereby particles repel each other when they are less widely spaced and attract each other when they are more widely spaced.

10.1. Python (Py)#

The code below uses the Python language and is the original baseline version without any optimisations.

10.1.1. Py code#

import matplotlib.pyplot as plt
import timeit
import numpy as np

# Used to randomly generate the initial positions of "n" particles.
# The horizontal and vertical coordinates of the generated particles are within (0, 2).
# Where a for loop is used to ensure that 
# the distance between particles is greater than 0.1
# to avoid confusion due to particles being too close together.

def particle_initial_position(n):
    np.random.seed(0)    #Make sure that the randomly generated particles are the same each time to make it easy to compare results.
    p = np.random.rand(n, 2) * 2
    
    #Ensure that the distance between particles is greater than 0.1
    for i in range(n):
        while True:
            conflict = False
            for j in range(i):
                distance = np.linalg.norm(p[i] - p[j])
                if distance <= 0.1:
                    conflict = True
                    break
            if not conflict:
                break
            p[i] = np.random.rand(2) * 2
    return p



# main function,
# "n" is the number of particles,
# "iterations" is the total number of iterations (how many Δt are computed), 
# and "show_plot" controls whether or not to display the particle motion trajectories in the plot.

def simulate_py(n, iterations, show_plot=False): 
    p = particle_initial_position(n)    # Call the initial position of the "n" particles
    
    for _ in range(iterations):    # iterative loop
        fs = np.zeros(shape=(n, 2))  # forces from all other particles
        
        for i in range(n):
            for j in range(n):
                if i != j:
                    r = p[j] - p[i]    #the distance between particles, "r" is a vector.
                    dist = np.sqrt(np.sum(r**2))    #the distance between particles, "dist" is a scalar.
                    
                    # calculate the unit vector, i.e. to obtain the direction of the force
                    with np.errstate(invalid='ignore'):    # ignore cases where the denominator is zero
                        unit_vector_nan = r / dist    
                    unit_vector = np.nan_to_num(unit_vector_nan)
                    
                    # The Lennar-Jones potential's formula on force.(scalar)
                    epsilon = 1    # Parameters in the potential energy equation
                    sigma = 0.1    # Parameters in the potential energy equation
                    with np.errstate(invalid='ignore'): 
                        force_nan = 48 * epsilon * np.power(sigma, 12) / np.power(dist, 13) - 24 * epsilon * np.power(sigma, 6) / np.power(dist, 7)
                    force = np.nan_to_num(force_nan)
                    fs[i] += -force * unit_vector    #Transformation into vectors
        
        # calculating the displacement at time Δt
        x_delta = fs / 1 * 0.00001    # Δt is 0.00001.
        
        # Get the new position of particle "p" after displacement generation
        p = update_position(p, x_delta)
        
        # Plot
        pos = p
        colors = ['red', 'green', 'blue', 'orange']    # Colour the particles in plot
        if show_plot:
            if _ % 50 == 0:
                update_plot(pos,colors)
                
    # plot finally result
#    print("P({}): ".format(iterations), p)
#    return p



# is used to update the position of the particle.
# "delta_r" is the displacement produced in Δt,
# "p" is the position of the particle,
# "new_p" is the position of the particle after the displacement has occurred

def update_position(p, delta_r):
    new_pos = p + delta_r
    return new_pos



# for plot particles
def update_plot(pos,color):
    plt.clf()
    xpos = pos[:, 0]
    ypos = pos[:, 1]
    N = len(pos)
    N_color = len(color)
    for i in range(N):
        plt.plot(xpos[i], ypos[i], "o", color=color[i % N_color])
    plt.xlim(left=-0.1, right=2.1)
    plt.ylim(bottom=-0.1, top=2.1)
    plt.grid()
    plt.draw()
    plt.pause(0.0001)

10.1.2. Plot for Py#

Show the trajectory of the particle in the diagram:

%matplotlib
simulate_py(250, 1000, show_plot=True)

Using matplotlib backend: QtAgg

10.1.3. Execution time for Py#

Calculating execution time for python versions:

(Execution time is “compute_time_py”)

compute_time_py = timeit.timeit(lambda: simulate_py(250, 1000, show_plot=False), number=1)
print("simulate_py execution time:", compute_time_py)

simulate_py execution time: 56.56827179999982

10.2. Numpy (Np)#

The following code is optimised for vectorisation with numpy.

10.2.1. Np code#

import matplotlib.pyplot as plt
import timeit
import numpy as np
np.seterr(divide='ignore', invalid='ignore')


# Used to randomly generate the initial positions of "n" particles.
# not optimised objects and therefore identical to Py

def particle_initial_position(n):
    np.random.seed(0)
    p = np.random.rand(n, 2) * 2
    for i in range(n):
        while True:
            conflict = False
            for j in range(i):
                distance = np.linalg.norm(p[i] - p[j])
                if distance <= 0.1:
                    conflict = True
                    break
            if not conflict:
                break
            p[i] = np.random.rand(2) * 2
    return p



# main function,
# "n" is the number of particles,
# "iterations" is the total number of iterations (how many Δt are computed), 
# and "show_plot" controls whether or not to display the particle motion trajectories in the plot.

def simulate_np(n, iterations,show_plot=False):
    p = particle_initial_position(n)
    for _ in range(iterations):
        
        # Calculate the spacing of the particles.
        rvs = (p[:, np.newaxis, :] - p[np.newaxis, :, :])    #the distance between particles, vector.
        dist = np.sqrt(np.sum(rvs**2, axis=-1))      ##the distance between particles, scalar.
        fs = np.zeros(shape=(n, 2))  # forces from all other particles

        dist_i = dist[:, :]
        rvs_i = rvs[:, :, :]

        # Calculate the unit vector, i.e. the direction of force
        with np.errstate(invalid='ignore'):
            unit_vectors_nan = rvs_i / dist_i[:, :, np.newaxis]
        unit_vectors = np.nan_to_num(unit_vectors_nan)

        dist_new = dist_i[:, :, np.newaxis]
        
        # Calculate the L-J potential force
        epsilon = 1
        sigma = 0.1
        with np.errstate(invalid='ignore'):
            fs_nan = 48 * epsilon * np.power(sigma, 12) / np.power(dist_new, 13)-24 * epsilon * np.power(sigma, 6) / np.power(dist_new, 7)
        fs = np.nan_to_num(fs_nan)*unit_vectors
                
        # Calculate the total force
        f_i = fs.sum(axis=1)
        
        # Calculate the displacement, delta t is 0.00001.
        x_delta = f_i / 1 * 0.00001
        
        # update position of particles
        p = update_position(p, x_delta)
        
        # Plot
        pos = p
        colors = ['red', 'green', 'blue', 'orange'] 
        if show_plot:
            if _ % 50 == 0:
                update_plot(pos,colors)
                
    # plot finally result
#    print("P({}): ".format(iterations), p)
#    return p



# update the position of the particle.
# "delta_r" is the displacement produced in Δt,
# "p" is the position of the particle,
# "new_p" is the position of the particle after the displacement has occurred

def update_position(p, delta_r):
    new_pos = p + delta_r
    return new_pos



# Plot
def update_plot(pos,color):
    plt.clf()
    xpos = pos[:, 0]
    ypos = pos[:, 1]
    N = len(pos)
    N_color = len(color)
    for i in range(N):
        plt.plot(xpos[i], ypos[i], "o", color=color[i % N_color])
    plt.xlim(left=-0.1, right=2.1)
    plt.ylim(bottom=-0.1, top=2.1)
    plt.grid()
    plt.draw()
    plt.pause(0.0001)

10.2.2. Plot for Np#

Show the trajectory of the particle in the diagram:

%matplotlib
simulate_np(250, 1000, show_plot=True)

Using matplotlib backend: QtAgg

10.2.3. Execution time for Np#

Calculating execution time for numpy versions:

(Execution time is “compute_time_np”)

compute_time_np = timeit.timeit(lambda: simulate_np(250, 1000,show_plot=False), number=1)

print("simulate_np execution time:", compute_time_np)

simulate_np execution time: 1.413035000000491

10.3. Numba (Nb)#

Optimisation with Numba.

10.3.1. Nb code#

import matplotlib.pyplot as plt
import timeit
import numpy as np
import numba as nb


# Used to randomly generate the initial positions of "n" particles.
# not optimised objects and therefore identical to Py
def particle_initial_position(n):
    np.random.seed(0)
    p = np.random.rand(n, 2) * 2
    for i in range(n):
        while True:
            conflict = False
            for j in range(i):
                distance = np.linalg.norm(p[i] - p[j])
                if distance <= 0.1:
                    conflict = True
                    break
            if not conflict:
                break
            p[i] = np.random.rand(2) * 2
    return p



# main function,
# "n" is the number of particles,
# "iterations" is the total number of iterations (how many Δt are computed), 
@nb.jit
def simulate_nb(p, iterations):
    n = len(p)
    fs = np.zeros(shape=(n, 2))
    x_delta = np.zeros(shape=(n, 2))
    pos = p.copy()
    epsilon = 1
    sigma = 0.1
    
    for _ in range(iterations):
        fs[:, :] = 0.0

        for i in range(n):
            for j in range(n):
                if i != j:
                    # Calculate the distant, vector.
                    x = pos[j, 0] - pos[i, 0]
                    y = pos[j, 1] - pos[i, 1]
                    
                    # Calculate the distant, scalar.
                    dist = (x ** 2 + y ** 2) ** 0.5
                    
                    # Calculate the horizontal and vertical coordinates of the unit vector
                    ux = x / dist
                    uy = y / dist
                    
                    # Calculate the force, scalar.
                    force = 48 * epsilon * (sigma ** 12) / (dist ** 13) - 24 * epsilon * (sigma ** 6) / (dist ** 7)
                    
                    # Convert the force into vector and multiply it in advance by delta(0.00001).
                    factor = 0.00001
                    fs[i, 0] += -force * ux * factor
                    fs[i, 1] += -force * uy * factor

        # Calculate the displacement.
        x_delta[:, :] = 0.0
        for i in range(n):
            for j in range(2):
                x_delta[i, j] = fs[i, j] / 1.0

        # update the position of particles.
        pos = update_position(pos, x_delta)

    return pos



# update the position of particles.

@nb.jit
def update_position(p, delta_r, minimum=0, maximum=2):

    n = p.shape[0]

    new_pos = np.empty_like(p, dtype=np.float64)
    
    for i in range(n):
        x = p[i, 0] + delta_r[i, 0]
        y = p[i, 1] + delta_r[i, 1]

        new_pos[i, 0] = x
        new_pos[i, 1] = y

    return new_pos

10.3.2. Execution time for Nb#

Calculating execution time for numba versions:

(Execution time is “compute_time_nb”)

compute_time_nb = timeit.timeit(lambda: simulate_nb(particle_initial_position(250), 1000), number=1)

print("simulate_nb execution time:", compute_time_nb)

simulate_nb execution time: 2.498092000001634

10.4. Cython (Cy)#

Optimisation with Cython.

10.4.1. Cy code#

number of threads is 1 by default.

%load_ext cython

%%cython

import pyximport
pyximport.install()

import matplotlib.pyplot as plt
import timeit
import numpy as np
cimport numpy as cnp


# Used to randomly generate the initial positions of "n" particles.
# not optimised objects and therefore identical to Py
def particle_initial_position(n):
    np.random.seed(0)
    p = np.random.rand(n, 2) * 2
    for i in range(n):
        while True:
            conflict = False
            for j in range(i):
                distance = np.linalg.norm(p[i] - p[j])
                if distance <= 0.1:
                    conflict = True
                    break
            if not conflict:
                break
            p[i] = np.random.rand(2) * 2
    return p



# main function,
# "n" is the number of particles,
# "iterations" is the total number of iterations (how many Δt are computed).
cimport cython
@cython.boundscheck(False)
@cython.wraparound(False)
@cython.cdivision(True)
cpdef simulate_cy( Py_ssize_t n, int iterations):
    cdef cnp.ndarray[double, ndim=2] p = particle_initial_position(n)    # position of particles
    cdef cnp.ndarray[double, ndim=2] fs    # force, vector
    cdef cnp.ndarray[double, ndim=1] r    # distance between paricles, vector
    cdef double dist    # distance between paricles, scalar
    cdef cnp.ndarray[double, ndim=1] unit_vector    # unit vector, the direction of force
    cdef double epsilon = 1
    cdef double sigma = 0.1
    cdef double force    # force, scalar
    cdef cnp.ndarray[double, ndim=2] x_delta    # displacement
    cdef double[:,::1] pos    # new position of particles
    cdef double x,y    # x-axis and y-axis

    
    for _ in range(iterations):
        x_delta = fs = np.zeros(shape=(n, 2))
        
        for i in range(n):
            for j in range(n):
                if i != j:
                    x = p[j,0] - p[i,0]
                    y = p[j,1] - p[i,1]
                    dist = (x**2+y**2)**0.5
                    
                    # unit vector is (ux,uy)
                    ux = x / dist
                    uy = y / dist
                    force = 48 * epsilon * (sigma**12) / (dist**13) - 24 * epsilon * (sigma**6) / (dist**7)
                    factor = 0.00001    # delta t
                    fs[i,0] += -force * ux * factor
                    fs[i,1] += -force * uy * factor

        x_delta = np.zeros(shape=(n, 2))
        for i in range(n):
            for j in range(2):
                x_delta[i, j] = fs[i, j] / 1

        p = update_position(p, x_delta)
        pos = p
    
    return p



cimport cython
@cython.boundscheck(False)
@cython.wraparound(False)
@cython.cdivision(True)
cpdef update_position(cnp.ndarray[double, ndim=2] p, cnp.ndarray[double, ndim=2] delta_r):
    cdef Py_ssize_t i
    cdef cnp.ndarray[double, ndim=2] new_pos
    cdef double x, y
    cdef Py_ssize_t n = p.shape[0]

    new_pos = np.empty_like(p, dtype=np.float64)
    
    for i in range(n):
        x = p[i, 0] + delta_r[i, 0]
        y = p[i, 1] + delta_r[i, 1]

        new_pos[i, 0] = x
        new_pos[i, 1] = y

    return new_pos

10.4.2. Execution time for Cy#

Calculating execution time for cython versions (number of threads is 4):

(Execution time is “compute_time_cy”)

compute_time_cy = timeit.timeit(lambda: simulate_cy(250, 1000), number=1)
print("simulate_cy execution time:", compute_time_cy)

simulate_cy execution time: 18.137456100001145

10.4.3. Cy_threads_4 code#

number of threads is 4.

%%cython --force -c=/openmp

import pyximport
pyximport.install()

import matplotlib.pyplot as plt
import timeit
import numpy as np
cimport numpy as cnp
cimport cython
from cython.parallel import prange

# Used to randomly generate the initial positions of "n" particles.
# not optimised objects and therefore identical to Py
def particle_initial_position(n):
    np.random.seed(0)
    p = np.random.rand(n, 2) * 2
    for i in range(n):
        while True:
            conflict = False
            for j in range(i):
                distance = np.linalg.norm(p[i] - p[j])
                if distance <= 0.1:
                    conflict = True
                    break
            if not conflict:
                break
            p[i] = np.random.rand(2) * 2
    return p


# main function,
# "n" is the number of particles,
# "iterations" is the total number of iterations (how many Δt are computed).
cimport cython
@cython.boundscheck(False)
@cython.wraparound(False)
@cython.cdivision(True)
cpdef simulate_cy_threads_4(Py_ssize_t n, int iterations):
    cdef cnp.ndarray[double, ndim=2] p = particle_initial_position(n)    # position of particles
    cdef cnp.ndarray[double, ndim=2] fs = np.zeros(shape=(n, 2))    # force, vector
    cdef cnp.ndarray[double, ndim=2] x_delta = np.zeros(shape=(n, 2))    # displacement
    cdef cnp.ndarray[double, ndim=2] pos = p.copy()    # new position of particles
    cdef Py_ssize_t i, j
    cdef double x, y, dist, ux, uy, force, factor
    cdef double epsilon = 1
    cdef double sigma = 0.1
    
    for _ in range(iterations):
        fs[:, :] = 0.0

        for i in prange(n, num_threads=4, nogil=True):
            for j in range(n):
                if i != j:
                    x = pos[j, 0] - pos[i, 0]
                    y = pos[j, 1] - pos[i, 1]
                    dist = (x ** 2 + y ** 2) ** 0.5

                    ux = x / dist
                    uy = y / dist
                    force = 48 * epsilon * (sigma ** 12) / (dist ** 13) - 24 * epsilon * (sigma ** 6) / (dist ** 7)
                    factor = 0.00001
                    fs[i, 0] += -force * ux * factor
                    fs[i, 1] += -force * uy * factor

        x_delta[:, :] = 0.0
        for i in prange(n, num_threads=4, nogil=True):
            for j in range(2):
                x_delta[i, j] = fs[i, j] / 1.0

        pos = update_position(pos, x_delta)

    return pos




cimport cython
@cython.boundscheck(False)
@cython.wraparound(False)
@cython.cdivision(True)
cpdef update_position(cnp.ndarray[double, ndim=2] p, cnp.ndarray[double, ndim=2] delta_r):
    cdef Py_ssize_t i
    cdef cnp.ndarray[double, ndim=2] new_pos
    cdef double x, y
    cdef Py_ssize_t n = p.shape[0]

    new_pos = np.empty_like(p, dtype=np.float64)
    
    for i in prange(n, num_threads=4, nogil=True):
        x = p[i, 0] + delta_r[i, 0]
        y = p[i, 1] + delta_r[i, 1]

        new_pos[i, 0] = x
        new_pos[i, 1] = y
    return new_pos

10.4.4. Execution time for Cy_threads_4#