10. Performance comparison (Python & Numpy & Numba & Cython & C)#
The following program is a simulation of particle motion with the lenaard-jones potential, with n controlling the number of particles and iterations controlling the number of iterations. The article concludes with a comparison of the performance of the Python version, the vectorisation optimised version using Numpy, the optimised version using Cython, and the version using C.
Model of particle motion: the forces between particles utilise the Lennard Jones potential, whereby particles repel each other when they are less widely spaced and attract each other when they are more widely spaced.
10.1. Python (Py)#
The code below uses the Python language and is the original baseline version without any optimisations.
10.1.1. Py code#
import matplotlib.pyplot as plt
import timeit
import numpy as np
# Used to randomly generate the initial positions of "n" particles.
# The horizontal and vertical coordinates of the generated particles are within (0, 2).
# Where a for loop is used to ensure that
# the distance between particles is greater than 0.1
# to avoid confusion due to particles being too close together.
def particle_initial_position(n):
np.random.seed(0) #Make sure that the randomly generated particles are the same each time to make it easy to compare results.
p = np.random.rand(n, 2) * 2
#Ensure that the distance between particles is greater than 0.1
for i in range(n):
while True:
conflict = False
for j in range(i):
distance = np.linalg.norm(p[i] - p[j])
if distance <= 0.1:
conflict = True
break
if not conflict:
break
p[i] = np.random.rand(2) * 2
return p
# main function,
# "n" is the number of particles,
# "iterations" is the total number of iterations (how many Δt are computed),
# and "show_plot" controls whether or not to display the particle motion trajectories in the plot.
def simulate_py(n, iterations, show_plot=False):
p = particle_initial_position(n) # Call the initial position of the "n" particles
for _ in range(iterations): # iterative loop
fs = np.zeros(shape=(n, 2)) # forces from all other particles
for i in range(n):
for j in range(n):
if i != j:
r = p[j] - p[i] #the distance between particles, "r" is a vector.
dist = np.sqrt(np.sum(r**2)) #the distance between particles, "dist" is a scalar.
# calculate the unit vector, i.e. to obtain the direction of the force
with np.errstate(invalid='ignore'): # ignore cases where the denominator is zero
unit_vector_nan = r / dist
unit_vector = np.nan_to_num(unit_vector_nan)
# The Lennar-Jones potential's formula on force.(scalar)
epsilon = 1 # Parameters in the potential energy equation
sigma = 0.1 # Parameters in the potential energy equation
with np.errstate(invalid='ignore'):
force_nan = 48 * epsilon * np.power(sigma, 12) / np.power(dist, 13) - 24 * epsilon * np.power(sigma, 6) / np.power(dist, 7)
force = np.nan_to_num(force_nan)
fs[i] += -force * unit_vector #Transformation into vectors
# calculating the displacement at time Δt
x_delta = fs / 1 * 0.00001 # Δt is 0.00001.
# Get the new position of particle "p" after displacement generation
p = update_position(p, x_delta)
# Plot
pos = p
colors = ['red', 'green', 'blue', 'orange'] # Colour the particles in plot
if show_plot:
if _ % 50 == 0:
update_plot(pos,colors)
# plot finally result
# print("P({}): ".format(iterations), p)
# return p
# is used to update the position of the particle.
# "delta_r" is the displacement produced in Δt,
# "p" is the position of the particle,
# "new_p" is the position of the particle after the displacement has occurred
def update_position(p, delta_r):
new_pos = p + delta_r
return new_pos
# for plot particles
def update_plot(pos,color):
plt.clf()
xpos = pos[:, 0]
ypos = pos[:, 1]
N = len(pos)
N_color = len(color)
for i in range(N):
plt.plot(xpos[i], ypos[i], "o", color=color[i % N_color])
plt.xlim(left=-0.1, right=2.1)
plt.ylim(bottom=-0.1, top=2.1)
plt.grid()
plt.draw()
plt.pause(0.0001)
10.1.2. Plot for Py#
Show the trajectory of the particle in the diagram:
%matplotlib
simulate_py(250, 1000, show_plot=True)
Using matplotlib backend: QtAgg
10.1.3. Execution time for Py#
Calculating execution time for python versions:
(Execution time is “compute_time_py”)
compute_time_py = timeit.timeit(lambda: simulate_py(250, 1000, show_plot=False), number=1)
print("simulate_py execution time:", compute_time_py)
simulate_py execution time: 56.56827179999982
10.2. Numpy (Np)#
The following code is optimised for vectorisation with numpy.
10.2.1. Np code#
import matplotlib.pyplot as plt
import timeit
import numpy as np
np.seterr(divide='ignore', invalid='ignore')
# Used to randomly generate the initial positions of "n" particles.
# not optimised objects and therefore identical to Py
def particle_initial_position(n):
np.random.seed(0)
p = np.random.rand(n, 2) * 2
for i in range(n):
while True:
conflict = False
for j in range(i):
distance = np.linalg.norm(p[i] - p[j])
if distance <= 0.1:
conflict = True
break
if not conflict:
break
p[i] = np.random.rand(2) * 2
return p
# main function,
# "n" is the number of particles,
# "iterations" is the total number of iterations (how many Δt are computed),
# and "show_plot" controls whether or not to display the particle motion trajectories in the plot.
def simulate_np(n, iterations,show_plot=False):
p = particle_initial_position(n)
for _ in range(iterations):
# Calculate the spacing of the particles.
rvs = (p[:, np.newaxis, :] - p[np.newaxis, :, :]) #the distance between particles, vector.
dist = np.sqrt(np.sum(rvs**2, axis=-1)) ##the distance between particles, scalar.
fs = np.zeros(shape=(n, 2)) # forces from all other particles
dist_i = dist[:, :]
rvs_i = rvs[:, :, :]
# Calculate the unit vector, i.e. the direction of force
with np.errstate(invalid='ignore'):
unit_vectors_nan = rvs_i / dist_i[:, :, np.newaxis]
unit_vectors = np.nan_to_num(unit_vectors_nan)
dist_new = dist_i[:, :, np.newaxis]
# Calculate the L-J potential force
epsilon = 1
sigma = 0.1
with np.errstate(invalid='ignore'):
fs_nan = 48 * epsilon * np.power(sigma, 12) / np.power(dist_new, 13)-24 * epsilon * np.power(sigma, 6) / np.power(dist_new, 7)
fs = np.nan_to_num(fs_nan)*unit_vectors
# Calculate the total force
f_i = fs.sum(axis=1)
# Calculate the displacement, delta t is 0.00001.
x_delta = f_i / 1 * 0.00001
# update position of particles
p = update_position(p, x_delta)
# Plot
pos = p
colors = ['red', 'green', 'blue', 'orange']
if show_plot:
if _ % 50 == 0:
update_plot(pos,colors)
# plot finally result
# print("P({}): ".format(iterations), p)
# return p
# update the position of the particle.
# "delta_r" is the displacement produced in Δt,
# "p" is the position of the particle,
# "new_p" is the position of the particle after the displacement has occurred
def update_position(p, delta_r):
new_pos = p + delta_r
return new_pos
# Plot
def update_plot(pos,color):
plt.clf()
xpos = pos[:, 0]
ypos = pos[:, 1]
N = len(pos)
N_color = len(color)
for i in range(N):
plt.plot(xpos[i], ypos[i], "o", color=color[i % N_color])
plt.xlim(left=-0.1, right=2.1)
plt.ylim(bottom=-0.1, top=2.1)
plt.grid()
plt.draw()
plt.pause(0.0001)
10.2.2. Plot for Np#
Show the trajectory of the particle in the diagram:
%matplotlib
simulate_np(250, 1000, show_plot=True)
Using matplotlib backend: QtAgg
10.2.3. Execution time for Np#
Calculating execution time for numpy versions:
(Execution time is “compute_time_np”)
compute_time_np = timeit.timeit(lambda: simulate_np(250, 1000,show_plot=False), number=1)
print("simulate_np execution time:", compute_time_np)
simulate_np execution time: 1.413035000000491
10.3. Numba (Nb)#
Optimisation with Numba.
10.3.1. Nb code#
import matplotlib.pyplot as plt
import timeit
import numpy as np
import numba as nb
# Used to randomly generate the initial positions of "n" particles.
# not optimised objects and therefore identical to Py
def particle_initial_position(n):
np.random.seed(0)
p = np.random.rand(n, 2) * 2
for i in range(n):
while True:
conflict = False
for j in range(i):
distance = np.linalg.norm(p[i] - p[j])
if distance <= 0.1:
conflict = True
break
if not conflict:
break
p[i] = np.random.rand(2) * 2
return p
# main function,
# "n" is the number of particles,
# "iterations" is the total number of iterations (how many Δt are computed),
@nb.jit
def simulate_nb(p, iterations):
n = len(p)
fs = np.zeros(shape=(n, 2))
x_delta = np.zeros(shape=(n, 2))
pos = p.copy()
epsilon = 1
sigma = 0.1
for _ in range(iterations):
fs[:, :] = 0.0
for i in range(n):
for j in range(n):
if i != j:
# Calculate the distant, vector.
x = pos[j, 0] - pos[i, 0]
y = pos[j, 1] - pos[i, 1]
# Calculate the distant, scalar.
dist = (x ** 2 + y ** 2) ** 0.5
# Calculate the horizontal and vertical coordinates of the unit vector
ux = x / dist
uy = y / dist
# Calculate the force, scalar.
force = 48 * epsilon * (sigma ** 12) / (dist ** 13) - 24 * epsilon * (sigma ** 6) / (dist ** 7)
# Convert the force into vector and multiply it in advance by delta(0.00001).
factor = 0.00001
fs[i, 0] += -force * ux * factor
fs[i, 1] += -force * uy * factor
# Calculate the displacement.
x_delta[:, :] = 0.0
for i in range(n):
for j in range(2):
x_delta[i, j] = fs[i, j] / 1.0
# update the position of particles.
pos = update_position(pos, x_delta)
return pos
# update the position of particles.
@nb.jit
def update_position(p, delta_r, minimum=0, maximum=2):
n = p.shape[0]
new_pos = np.empty_like(p, dtype=np.float64)
for i in range(n):
x = p[i, 0] + delta_r[i, 0]
y = p[i, 1] + delta_r[i, 1]
new_pos[i, 0] = x
new_pos[i, 1] = y
return new_pos
10.3.2. Execution time for Nb#
Calculating execution time for numba versions:
(Execution time is “compute_time_nb”)
compute_time_nb = timeit.timeit(lambda: simulate_nb(particle_initial_position(250), 1000), number=1)
print("simulate_nb execution time:", compute_time_nb)
simulate_nb execution time: 2.498092000001634
10.4. Cython (Cy)#
Optimisation with Cython.
10.4.1. Cy code#
number of threads is 1 by default.
%load_ext cython
%%cython
import pyximport
pyximport.install()
import matplotlib.pyplot as plt
import timeit
import numpy as np
cimport numpy as cnp
# Used to randomly generate the initial positions of "n" particles.
# not optimised objects and therefore identical to Py
def particle_initial_position(n):
np.random.seed(0)
p = np.random.rand(n, 2) * 2
for i in range(n):
while True:
conflict = False
for j in range(i):
distance = np.linalg.norm(p[i] - p[j])
if distance <= 0.1:
conflict = True
break
if not conflict:
break
p[i] = np.random.rand(2) * 2
return p
# main function,
# "n" is the number of particles,
# "iterations" is the total number of iterations (how many Δt are computed).
cimport cython
@cython.boundscheck(False)
@cython.wraparound(False)
@cython.cdivision(True)
cpdef simulate_cy( Py_ssize_t n, int iterations):
cdef cnp.ndarray[double, ndim=2] p = particle_initial_position(n) # position of particles
cdef cnp.ndarray[double, ndim=2] fs # force, vector
cdef cnp.ndarray[double, ndim=1] r # distance between paricles, vector
cdef double dist # distance between paricles, scalar
cdef cnp.ndarray[double, ndim=1] unit_vector # unit vector, the direction of force
cdef double epsilon = 1
cdef double sigma = 0.1
cdef double force # force, scalar
cdef cnp.ndarray[double, ndim=2] x_delta # displacement
cdef double[:,::1] pos # new position of particles
cdef double x,y # x-axis and y-axis
for _ in range(iterations):
x_delta = fs = np.zeros(shape=(n, 2))
for i in range(n):
for j in range(n):
if i != j:
x = p[j,0] - p[i,0]
y = p[j,1] - p[i,1]
dist = (x**2+y**2)**0.5
# unit vector is (ux,uy)
ux = x / dist
uy = y / dist
force = 48 * epsilon * (sigma**12) / (dist**13) - 24 * epsilon * (sigma**6) / (dist**7)
factor = 0.00001 # delta t
fs[i,0] += -force * ux * factor
fs[i,1] += -force * uy * factor
x_delta = np.zeros(shape=(n, 2))
for i in range(n):
for j in range(2):
x_delta[i, j] = fs[i, j] / 1
p = update_position(p, x_delta)
pos = p
return p
cimport cython
@cython.boundscheck(False)
@cython.wraparound(False)
@cython.cdivision(True)
cpdef update_position(cnp.ndarray[double, ndim=2] p, cnp.ndarray[double, ndim=2] delta_r):
cdef Py_ssize_t i
cdef cnp.ndarray[double, ndim=2] new_pos
cdef double x, y
cdef Py_ssize_t n = p.shape[0]
new_pos = np.empty_like(p, dtype=np.float64)
for i in range(n):
x = p[i, 0] + delta_r[i, 0]
y = p[i, 1] + delta_r[i, 1]
new_pos[i, 0] = x
new_pos[i, 1] = y
return new_pos
10.4.2. Execution time for Cy#
Calculating execution time for cython versions (number of threads is 4):
(Execution time is “compute_time_cy”)
compute_time_cy = timeit.timeit(lambda: simulate_cy(250, 1000), number=1)
print("simulate_cy execution time:", compute_time_cy)
simulate_cy execution time: 18.137456100001145
10.4.3. Cy_threads_4 code#
number of threads is 4.
%%cython --force -c=/openmp
import pyximport
pyximport.install()
import matplotlib.pyplot as plt
import timeit
import numpy as np
cimport numpy as cnp
cimport cython
from cython.parallel import prange
# Used to randomly generate the initial positions of "n" particles.
# not optimised objects and therefore identical to Py
def particle_initial_position(n):
np.random.seed(0)
p = np.random.rand(n, 2) * 2
for i in range(n):
while True:
conflict = False
for j in range(i):
distance = np.linalg.norm(p[i] - p[j])
if distance <= 0.1:
conflict = True
break
if not conflict:
break
p[i] = np.random.rand(2) * 2
return p
# main function,
# "n" is the number of particles,
# "iterations" is the total number of iterations (how many Δt are computed).
cimport cython
@cython.boundscheck(False)
@cython.wraparound(False)
@cython.cdivision(True)
cpdef simulate_cy_threads_4(Py_ssize_t n, int iterations):
cdef cnp.ndarray[double, ndim=2] p = particle_initial_position(n) # position of particles
cdef cnp.ndarray[double, ndim=2] fs = np.zeros(shape=(n, 2)) # force, vector
cdef cnp.ndarray[double, ndim=2] x_delta = np.zeros(shape=(n, 2)) # displacement
cdef cnp.ndarray[double, ndim=2] pos = p.copy() # new position of particles
cdef Py_ssize_t i, j
cdef double x, y, dist, ux, uy, force, factor
cdef double epsilon = 1
cdef double sigma = 0.1
for _ in range(iterations):
fs[:, :] = 0.0
for i in prange(n, num_threads=4, nogil=True):
for j in range(n):
if i != j:
x = pos[j, 0] - pos[i, 0]
y = pos[j, 1] - pos[i, 1]
dist = (x ** 2 + y ** 2) ** 0.5
ux = x / dist
uy = y / dist
force = 48 * epsilon * (sigma ** 12) / (dist ** 13) - 24 * epsilon * (sigma ** 6) / (dist ** 7)
factor = 0.00001
fs[i, 0] += -force * ux * factor
fs[i, 1] += -force * uy * factor
x_delta[:, :] = 0.0
for i in prange(n, num_threads=4, nogil=True):
for j in range(2):
x_delta[i, j] = fs[i, j] / 1.0
pos = update_position(pos, x_delta)
return pos
cimport cython
@cython.boundscheck(False)
@cython.wraparound(False)
@cython.cdivision(True)
cpdef update_position(cnp.ndarray[double, ndim=2] p, cnp.ndarray[double, ndim=2] delta_r):
cdef Py_ssize_t i
cdef cnp.ndarray[double, ndim=2] new_pos
cdef double x, y
cdef Py_ssize_t n = p.shape[0]
new_pos = np.empty_like(p, dtype=np.float64)
for i in prange(n, num_threads=4, nogil=True):
x = p[i, 0] + delta_r[i, 0]
y = p[i, 1] + delta_r[i, 1]
new_pos[i, 0] = x
new_pos[i, 1] = y
return new_pos
10.4.4. Execution time for Cy_threads_4#
Calculating execution time for cython versions (number of threads is 4):
(Execution time is “compute_time_cy_threads_4”)
compute_time_cy_threads_4 = timeit.timeit(lambda: simulate_cy_threads_4(250, 1000), number=1)
print("simulate_cy execution time:", compute_time_cy_threads_4)
simulate_cy execution time: 7.235822800001188
10.5. C#
Reconstructing particle motion models in C.
10.5.1. C code#
%%file executiontime.c
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <math.h>
#define N 250 //Number of particles
#define EPSILON 1.0 //Parameters in the potential energy equation
#define SIGMA 0.1 //Parameters in the potential energy equation
#define T_DELTA 0.00001
#define ITERATIONS 1000
typedef struct {
double x;
double y;
} Vector;
//Randomly generate the initial positions of the particles and ensure that the distance between particles is greater than 0.1.
void initialize_particles(Vector p[N]) {
srand(time(NULL));
for (int i = 0; i < N; i++) {
p[i].x = (double)rand() / RAND_MAX * 2;
p[i].y = (double)rand() / RAND_MAX * 2;
// Ensure that the spacing between particles is greater than 0.1
for (int j = 0; j < i; j++) {
double distance = sqrt(pow(p[j].x - p[i].x, 2) + pow(p[j].y - p[i].y, 2));
if (distance <= 0.1) {
i--; // Regenerate coordinates
break;
}
}
}
}
// Calculate the force
void calculate_force(Vector p[N], Vector fs[N]) {
for (int i = 0; i < N; i++) {
fs[i].x = 0.0;
fs[i].y = 0.0;
}
for (int i = 0; i < N; i++) {
for (int j = 0; j < N; j++) {
if (i != j) {
double r_x = p[j].x - p[i].x;
double r_y = p[j].y - p[i].y;
double dist = sqrt(r_x * r_x + r_y * r_y);
double unit_vector_x = r_x / dist;
double unit_vector_y = r_y / dist;
double force = 48.0 * EPSILON * pow(SIGMA, 12) / pow(dist, 13) - 24.0 * EPSILON * pow(SIGMA, 6) / pow(dist, 7);
fs[i].x += -force * unit_vector_x;
fs[i].y += -force * unit_vector_y;
}
}
}
}
// update position of particles.
void update_position(Vector p[N], Vector delta_r[N]) {
for (int i = 0; i < N; i++) {
p[i].x += delta_r[i].x;
p[i].y += delta_r[i].y;
}
}
// Print result
void print_positions(Vector p[N]) {
for (int i = 0; i < N; i++) {
printf("P(%d): [%.8f, %.8f]\n", i, p[i].x, p[i].y);
}
}
// main function, call other functions.
int main(void) {
clock_t start_time, end_time;
double total_time;
start_time = clock();
Vector p[N];
Vector fs[N];
Vector delta_r[N];
initialize_particles(p);
for (int iter = 0; iter < ITERATIONS; iter++) {
calculate_force(p, fs);
for (int i = 0; i < N; i++) {
delta_r[i].x = fs[i].x / 1 * T_DELTA;
delta_r[i].y = fs[i].y / 1 * T_DELTA;
}
update_position(p, delta_r);
}
end_time = clock();
total_time = (double)(end_time - start_time) / CLOCKS_PER_SEC;
printf("Code execution time:%f s\n", total_time);
// print_positions(p);
return 0;
}
Overwriting executiontime.c
10.5.2. Execution time for C#
Calculating execution time for C versions:
(Execution time is “simulate_c_execution_time”)
!gcc -Wall -pedantic -O3 -o executiontime executiontime.c
import time
t0 = time.time()
!executiontime
t1 = time.time()
simulate_c_execution_time = t1 - t0
Code execution time:9.778000 s
10.6. Performance-comparison#
import pandas as pd
from IPython.display import HTML
data = {
'Methods': ['Python', 'Numpy','Numba', 'Cython','Cython(threads=4)', 'C'],
'Excution time(s)': [compute_time_py, compute_time_np, compute_time_nb, compute_time_cy, compute_time_cy_threads_4, simulate_c_execution_time],
'Speed up': [1, compute_time_py/compute_time_np, compute_time_py/compute_time_nb, compute_time_py/compute_time_cy, compute_time_py/compute_time_cy_threads_4, compute_time_py/simulate_c_execution_time]
}
df = pd.DataFrame(data)
# Creating style functions
def add_border(val):
return 'border: 1px solid black'
# Applying style functions to data boxes
styled_df = df.style.applymap(add_border)
# Defining CSS styles
table_style = [
{'selector': 'table', 'props': [('border-collapse', 'collapse')]},
{'selector': 'th, td', 'props': [('border', '1px solid black')]}
]
# Adding styles to stylised data boxes
styled_df.set_table_styles(table_style)
# Displaying stylised data boxes in Jupyter Notebook
HTML(styled_df.to_html())
Methods | Excution time(s) | Speed up | |
---|---|---|---|
0 | Python | 5509.530889 | 1.000000 |
1 | Numpy | 13.338579 | 413.052312 |
2 | Numba | 3.197772 | 1722.928045 |
3 | Cython | 18.080679 | 304.719250 |
4 | Cython(threads=4) | 7.384355 | 746.108646 |
5 | C | 10.942000 | 503.521375 |