10. Performance comparison (Python & Numpy & Numba & Cython & C)#

The following program is a simulation of particle motion with the lenaard-jones potential, with n controlling the number of particles and iterations controlling the number of iterations. The article concludes with a comparison of the performance of the Python version, the vectorisation optimised version using Numpy, the optimised version using Cython, and the version using C.

Model of particle motion: the forces between particles utilise the Lennard Jones potential, whereby particles repel each other when they are less widely spaced and attract each other when they are more widely spaced.

10.1. Python (Py)#

The code below uses the Python language and is the original baseline version without any optimisations.

10.1.1. Py code#

import matplotlib.pyplot as plt
import timeit
import time
import numpy as np

# Used to randomly generate the initial positions of "n" particles.
# The horizontal and vertical coordinates of the generated particles are within (0, 2).
# Where a for loop is used to ensure that 
# the distance between particles is greater than 0.1
# to avoid confusion due to particles being too close together.

def particle_initial_position(n):
    np.random.seed(0)    #Make sure that the randomly generated particles are the same each time to make it easy to compare results.
    p = np.random.rand(n, 2) * 2
    
    #Ensure that the distance between particles is greater than 0.1
    for i in range(n):
        while True:
            conflict = False
            for j in range(i):
                distance = np.linalg.norm(p[i] - p[j])
                if distance <= 0.1:
                    conflict = True
                    break
            if not conflict:
                break
            p[i] = np.random.rand(2) * 2
    return p



# main function,
# "n" is the number of particles,
# "iterations" is the total number of iterations (how many Δt are computed), 
# and "show_plot" controls whether or not to display the particle motion trajectories in the plot.

def simulate_py(p, iterations, show_plot=False): 
#    p = particle_initial_position(n)    # Call the initial position of the "n" particles
    n = len(p)
    
    for _ in range(iterations):    # iterative loop
        fs = np.zeros(shape=(n, 2))  # forces from all other particles
        
        for i in range(n):
            for j in range(n):
                if i != j:
                    r = p[j] - p[i]    #the distance between particles, "r" is a vector.
                    dist = np.sqrt(np.sum(r**2))    #the distance between particles, "dist" is a scalar.
                    
                    # calculate the unit vector, i.e. to obtain the direction of the force
                    with np.errstate(invalid='ignore'):    # ignore cases where the denominator is zero
                        unit_vector_nan = r / dist    
                    unit_vector = np.nan_to_num(unit_vector_nan)
                    
                    # The Lennar-Jones potential's formula on force.(scalar)
                    epsilon = 1    # Parameters in the potential energy equation
                    sigma = 0.1    # Parameters in the potential energy equation
                    with np.errstate(invalid='ignore'): 
                        force_nan = 48 * epsilon * np.power(sigma, 12) / np.power(dist, 13) - 24 * epsilon * np.power(sigma, 6) / np.power(dist, 7)
                    force = np.nan_to_num(force_nan)
                    fs[i] += -force * unit_vector    #Transformation into vectors
        
        # calculating the displacement at time Δt
        x_delta = fs / 1 * 0.00001    # Δt is 0.00001.
        
        # Get the new position of particle "p" after displacement generation
        p = update_position(p, x_delta)
        
        # Plot
        pos = p
        colors = ['red', 'green', 'blue', 'orange']    # Colour the particles in plot
        if show_plot:
            if _ % 50 == 0:
                update_plot(pos,colors)
                
    # plot finally result
#    print("P({}): ".format(iterations), p)
#    return p



# is used to update the position of the particle.
# "delta_r" is the displacement produced in Δt,
# "p" is the position of the particle,
# "new_p" is the position of the particle after the displacement has occurred

def update_position(p, delta_r):
    new_pos = p + delta_r
    return new_pos



# for plot particles
def update_plot(pos,color):
    plt.clf()
    xpos = pos[:, 0]
    ypos = pos[:, 1]
    N = len(pos)
    N_color = len(color)
    for i in range(N):
        plt.plot(xpos[i], ypos[i], "o", color=color[i % N_color])
    plt.xlim(left=-0.1, right=2.1)
    plt.ylim(bottom=-0.1, top=2.1)
    plt.grid()
    plt.draw()
    plt.pause(0.0001)

10.1.2. Plot for Py#

Show the trajectory of the particle in the diagram:

%matplotlib
simulate_py(250, 1000, show_plot=True)
Using matplotlib backend: QtAgg

10.1.3. Execution time for Py#

Calculating execution time for python versions:

(Execution time is “compute_time_py”)

p = particle_initial_position(250)

start_time = time.time()

simulate_py(p, 1000)

end_time = time.time()

compute_time_py = end_time - start_time
print("simulate_py execution time:", compute_time_py)
simulate_py execution time: 5597.128479003906

10.2. Numpy (Np)#

The following code is optimised for vectorisation with numpy.

10.2.1. Np code(Num. of threads=4)#

import matplotlib.pyplot as plt
import time
import timeit
import numpy as np
from mkl import set_num_threads, get_max_threads
np.seterr(divide='ignore', invalid='ignore')


# Used to randomly generate the initial positions of "n" particles.
# not optimised objects and therefore identical to Py

def particle_initial_position(n):
    np.random.seed(0)
    p = np.random.rand(n, 2) * 2
    for i in range(n):
        while True:
            conflict = False
            for j in range(i):
                distance = np.linalg.norm(p[i] - p[j])
                if distance <= 0.1:
                    conflict = True
                    break
            if not conflict:
                break
            p[i] = np.random.rand(2) * 2
    return p



# main function,
# "n" is the number of particles,
# "iterations" is the total number of iterations (how many Δt are computed), 
# and "show_plot" controls whether or not to display the particle motion trajectories in the plot.

def simulate_np_4(p, iterations,show_plot=False):
#    p = particle_initial_position(n)
    set_num_threads(4)
    n = len(p)

    for _ in range(iterations):
        
        # Calculate the spacing of the particles.
        rvs = (p[:, np.newaxis, :] - p[np.newaxis, :, :])    #the distance between particles, vector.
        dist = np.sqrt(np.sum(rvs**2, axis=-1))      ##the distance between particles, scalar.
        fs = np.zeros(shape=(n, 2))  # forces from all other particles

        dist_i = dist[:, :]
        rvs_i = rvs[:, :, :]

        # Calculate the unit vector, i.e. the direction of force
        with np.errstate(invalid='ignore'):
            unit_vectors_nan = rvs_i / dist_i[:, :, np.newaxis]
        unit_vectors = np.nan_to_num(unit_vectors_nan)

        dist_new = dist_i[:, :, np.newaxis]
        
        # Calculate the L-J potential force
        epsilon = 1
        sigma = 0.1
        with np.errstate(invalid='ignore'):
            fs_nan = 48 * epsilon * np.power(sigma, 12) / np.power(dist_new, 13)-24 * epsilon * np.power(sigma, 6) / np.power(dist_new, 7)
        fs = np.nan_to_num(fs_nan)*unit_vectors
                
        # Calculate the total force
        f_i = fs.sum(axis=1)
        
        # Calculate the displacement, delta t is 0.00001.
        x_delta = f_i / 1 * 0.00001
        
        # update position of particles
        p = update_position(p, x_delta)
        
        # Plot
        pos = p
        colors = ['red', 'green', 'blue', 'orange'] 
        if show_plot:
            if _ % 50 == 0:
                update_plot(pos,colors)
                
    # plot finally result
#    print("P({}): ".format(iterations), p)

#    return p



# update the position of the particle.
# "delta_r" is the displacement produced in Δt,
# "p" is the position of the particle,
# "new_p" is the position of the particle after the displacement has occurred

def update_position(p, delta_r):
    new_pos = p + delta_r
    return new_pos



# Plot
def update_plot(pos,color):
    plt.clf()
    xpos = pos[:, 0]
    ypos = pos[:, 1]
    N = len(pos)
    N_color = len(color)
    for i in range(N):
        plt.plot(xpos[i], ypos[i], "o", color=color[i % N_color])
    plt.xlim(left=-0.1, right=2.1)
    plt.ylim(bottom=-0.1, top=2.1)
    plt.grid()
    plt.draw()
    plt.pause(0.0001)

10.2.2. Execution time for Np (Num. of threads=4)#

Calculating execution time for numpy versions:

(Execution time is “compute_time_np”)

p = particle_initial_position(250)

start_time = time.time()

simulate_np_4(p, 1000)

end_time = time.time()
max_threads = get_max_threads()
print("max threads:", max_threads)
compute_time_np_4 = end_time - start_time
print("simulate_np execution time_4:", compute_time_np_4)
max threads: 4
simulate_np execution time_4: 11.439701557159424

10.2.3. Np code (Num. of threads=1)#

import matplotlib.pyplot as plt
import time
import timeit
import numpy as np
from mkl import set_num_threads, get_max_threads
np.seterr(divide='ignore', invalid='ignore')


# Used to randomly generate the initial positions of "n" particles.
# not optimised objects and therefore identical to Py

def particle_initial_position(n):
    np.random.seed(0)
    p = np.random.rand(n, 2) * 2
    for i in range(n):
        while True:
            conflict = False
            for j in range(i):
                distance = np.linalg.norm(p[i] - p[j])
                if distance <= 0.1:
                    conflict = True
                    break
            if not conflict:
                break
            p[i] = np.random.rand(2) * 2
    return p



# main function,
# "n" is the number of particles,
# "iterations" is the total number of iterations (how many Δt are computed), 
# and "show_plot" controls whether or not to display the particle motion trajectories in the plot.

def simulate_np_1(p, iterations,show_plot=False):
#    p = particle_initial_position(n)
    set_num_threads(1)
    n = len(p)

    for _ in range(iterations):
        
        # Calculate the spacing of the particles.
        rvs = (p[:, np.newaxis, :] - p[np.newaxis, :, :])    #the distance between particles, vector.
        dist = np.sqrt(np.sum(rvs**2, axis=-1))      ##the distance between particles, scalar.
        fs = np.zeros(shape=(n, 2))  # forces from all other particles

        dist_i = dist[:, :]
        rvs_i = rvs[:, :, :]

        # Calculate the unit vector, i.e. the direction of force
        with np.errstate(invalid='ignore'):
            unit_vectors_nan = rvs_i / dist_i[:, :, np.newaxis]
        unit_vectors = np.nan_to_num(unit_vectors_nan)

        dist_new = dist_i[:, :, np.newaxis]
        
        # Calculate the L-J potential force
        epsilon = 1
        sigma = 0.1
        with np.errstate(invalid='ignore'):
            fs_nan = 48 * epsilon * np.power(sigma, 12) / np.power(dist_new, 13)-24 * epsilon * np.power(sigma, 6) / np.power(dist_new, 7)
        fs = np.nan_to_num(fs_nan)*unit_vectors
                
        # Calculate the total force
        f_i = fs.sum(axis=1)
        
        # Calculate the displacement, delta t is 0.00001.
        x_delta = f_i / 1 * 0.00001
        
        # update position of particles
        p = update_position(p, x_delta)
        
        # Plot
        pos = p
        colors = ['red', 'green', 'blue', 'orange'] 
        if show_plot:
            if _ % 50 == 0:
                update_plot(pos,colors)
                
    # plot finally result
#    print("P({}): ".format(iterations), p)


#    return p



# update the position of the particle.
# "delta_r" is the displacement produced in Δt,
# "p" is the position of the particle,
# "new_p" is the position of the particle after the displacement has occurred

def update_position(p, delta_r):
    new_pos = p + delta_r
    return new_pos



# Plot
def update_plot(pos,color):
    plt.clf()
    xpos = pos[:, 0]
    ypos = pos[:, 1]
    N = len(pos)
    N_color = len(color)
    for i in range(N):
        plt.plot(xpos[i], ypos[i], "o", color=color[i % N_color])
    plt.xlim(left=-0.1, right=2.1)
    plt.ylim(bottom=-0.1, top=2.1)
    plt.grid()
    plt.draw()
    plt.pause(0.0001)

10.2.4. Execution time for Np (Num. of threads=1)#

p = particle_initial_position(250)

start_time = time.time()

simulate_np_1(p, 1000)

end_time = time.time()
max_threads = get_max_threads()
print("max threads:", max_threads)
compute_time_np_1 = end_time - start_time
print("simulate_np execution time_1:", compute_time_np_1)
max threads: 1
simulate_np execution time_1: 10.788192510604858

10.3. Numba (Nb)#

Optimisation with Numba.

10.3.1. Nb code (Num. of threads=1)#

import matplotlib.pyplot as plt
import numpy as np
import numba as nb
import timeit
import time


# Used to randomly generate the initial positions of "n" particles.
# not optimised objects and therefore identical to Py
def particle_initial_position(n):
    np.random.seed(0)
    p = np.random.rand(n, 2) * 2
    for i in range(n):
        while True:
            conflict = False
            for j in range(i):
                distance = np.linalg.norm(p[i] - p[j])
                if distance <= 0.1:
                    conflict = True
                    break
            if not conflict:
                break
            p[i] = np.random.rand(2) * 2
    return p



# main function,
# "n" is the number of particles,
# "iterations" is the total number of iterations (how many Δt are computed), 
@nb.jit
def simulate_nb_1(p, iterations):
    n = len(p)
    fs = np.zeros(shape=(n, 2))
    x_delta = np.zeros(shape=(n, 2))
    pos = p.copy()
    epsilon = 1
    sigma = 0.1
    for _ in range(iterations):
        fs[:, :] = 0.0

        for i in range(n):
            for j in range(n):
                if i != j:
                    # Calculate the distant, vector.
                    x = pos[j, 0] - pos[i, 0]
                    y = pos[j, 1] - pos[i, 1]
                    
                    # Calculate the distant, scalar.
                    dist = (x ** 2 + y ** 2) ** 0.5
                    
                    # Calculate the horizontal and vertical coordinates of the unit vector
                    ux = x / dist
                    uy = y / dist
                    
                    # Calculate the force, scalar.
                    force = 48 * epsilon * (sigma ** 12) / (dist ** 13) - 24 * epsilon * (sigma ** 6) / (dist ** 7)
                    
                    # Convert the force into vector and multiply it in advance by delta(0.00001).
                    factor = 0.00001
                    fs[i, 0] += -force * ux * factor
                    fs[i, 1] += -force * uy * factor

        # Calculate the displacement.
        x_delta[:, :] = 0.0
        for i in range(n):
            for j in range(2):
                x_delta[i, j] = fs[i, j] / 1.0

        # update the position of particles.
        pos = update_position(pos, x_delta)

    return pos



# update the position of particles.

@nb.jit
def update_position(p, delta_r, minimum=0, maximum=2):

    n = p.shape[0]

    new_pos = np.empty_like(p, dtype=np.float64)
    
    for i in range(n):
        x = p[i, 0] + delta_r[i, 0]
        y = p[i, 1] + delta_r[i, 1]

        new_pos[i, 0] = x
        new_pos[i, 1] = y

    return new_pos

10.3.2. Execution time for Nb (Num. of threads=1)#

Calculating execution time for numba versions:

(Execution time is “compute_time_nb_1”)

p = particle_initial_position(250)

start_time = time.time()

simulate_nb_1(p, 1000)

end_time = time.time()
compute_time_nb_1 = end_time - start_time
print("simulate_nb execution time_1:", compute_time_nb_1)
simulate_nb execution time_1: 2.1452345848083496

10.3.3. Nb code (Num. of threads=4)#

import timeit
import numpy as np
import numba as nb


# nb.config.NUMBA_NUM_THREADS = 4
np.random.seed(42)

nb.config.NUMBA_DEFAULT_NUM_THREADS

# Used to randomly generate the initial positions of "n" particles.
# not optimised objects and therefore identical to Py
@nb.njit(cache=True, parallel=True)
def particle_initial_position(n):
    p = np.random.rand(n, 2) * 2
    for i in nb.prange(n):
        while True:
            conflict = False
            for j in range(i):
                # distance = np.linalg.norm(p[i] - p[j])
                distance = np.sqrt(np.sum((p[i] - p[j])**2))
                if distance <= 0.1:
                    conflict = True
                    break
            if not conflict:
                break
            p[i] = np.random.rand(2) * 2
    return p



# main function,
# "n" is the number of particles,
# "iterations" is the total number of iterations (how many Δt are computed), 
@nb.njit(cache=True, parallel=True)
def simulate_nb_4(p, iterations):
    n = len(p)
    fs = np.zeros(shape=(n, 2))
    x_delta = np.zeros(shape=(n, 2))
    pos = p.copy()
    epsilon = 1
    sigma = 0.1
    test = 0
    print("Starting the main for loop!")
    for _ in range(iterations):
        fs[:, :] = 0.0
        for i in nb.prange(n):
            for j in range(n):
                if i != j:
                    # Calculate the distant, vector.
                    x = pos[j, 0] - pos[i, 0]
                    y = pos[j, 1] - pos[i, 1]
                    # Calculate the distant, scalar.
                    dist = (x ** 2 + y ** 2) ** 0.5
                    
                    # Calculate the horizontal and vertical coordinates of the unit vector
                    ux = x / dist
                    uy = y / dist
                    
                    # Calculate the force, scalar.
                    force = 48 * epsilon * (sigma ** 12) / (dist ** 13) - 24 * epsilon * (sigma ** 6) / (dist ** 7)
                    
                    # Convert the force into vector and multiply it in advance by delta(0.00001).
                    factor = 0.00001
                    fs[i, 0] += -force * ux * factor
                    fs[i, 1] += -force * uy * factor

        # Calculate the displacement.
        x_delta[:, :] = 0.0
        for i in nb.prange(n):
            for j in range(2):
                x_delta[i, j] = fs[i, j] / 1.0

        # update the position of particles.
        pos = update_position(pos, x_delta)

    return pos



# update the position of particles.

@nb.njit(cache=True, parallel=True)
def update_position(p, delta_r, minimum=0, maximum=2):

    n = p.shape[0]

    new_pos = np.empty_like(p, dtype=np.float64)
    
    for i in nb.prange(n):
        x = p[i, 0] + delta_r[i, 0]
        y = p[i, 1] + delta_r[i, 1]

        new_pos[i, 0] = x
        new_pos[i, 1] = y

    return new_pos

10.3.4. Execution time for Nb (Num. of threads=4)#

Calculating execution time for numba versions:

(Execution time is “compute_time_nb_4”)

nb.set_num_threads(4)

p = particle_initial_position(250)

start_time = time.time()

simulate_nb_4(p, 1000)

end_time = time.time()
compute_time_nb_4 = end_time - start_time
print("simulate_nb execution time_4:", compute_time_nb_4)
Starting the main for loop!
simulate_nb execution time_4: 0.44873714447021484

10.4. Cython (Cy)#

Optimisation with Cython.

10.4.1. Cy code (Num. of threads=1)#

number of threads is 1 by default.

%load_ext cython
%%cython

import pyximport
pyximport.install()

import matplotlib.pyplot as plt
import timeit
import time
import numpy as np
cimport numpy as cnp


# Used to randomly generate the initial positions of "n" particles.
# not optimised objects and therefore identical to Py
def particle_initial_position(n):
    np.random.seed(0)
    p = np.random.rand(n, 2) * 2
    for i in range(n):
        while True:
            conflict = False
            for j in range(i):
                distance = np.linalg.norm(p[i] - p[j])
                if distance <= 0.1:
                    conflict = True
                    break
            if not conflict:
                break
            p[i] = np.random.rand(2) * 2
    return p



# main function,
# "n" is the number of particles,
# "iterations" is the total number of iterations (how many Δt are computed).
cimport cython
@cython.boundscheck(False)
@cython.wraparound(False)
@cython.cdivision(True)
cpdef simulate_cy_1( cnp.ndarray[double, ndim=2] p, int iterations):
#    cdef cnp.ndarray[double, ndim=2] p = particle_initial_position(n)    # position of particles
    
    cdef Py_ssize_t n = len(p)
    
    cdef cnp.ndarray[double, ndim=2] fs    # force, vector
    cdef cnp.ndarray[double, ndim=1] r    # distance between paricles, vector
    cdef double dist    # distance between paricles, scalar
    cdef cnp.ndarray[double, ndim=1] unit_vector    # unit vector, the direction of force
    cdef double epsilon = 1
    cdef double sigma = 0.1
    cdef double force    # force, scalar
    cdef cnp.ndarray[double, ndim=2] x_delta    # displacement
    cdef double[:,::1] pos    # new position of particles
    cdef double x,y    # x-axis and y-axis

    
    for _ in range(iterations):
        x_delta = fs = np.zeros(shape=(n, 2))
        
        for i in range(n):
            for j in range(n):
                if i != j:
                    x = p[j,0] - p[i,0]
                    y = p[j,1] - p[i,1]
                    dist = (x**2+y**2)**0.5
                    
                    # unit vector is (ux,uy)
                    ux = x / dist
                    uy = y / dist
                    force = 48 * epsilon * (sigma**12) / (dist**13) - 24 * epsilon * (sigma**6) / (dist**7)
                    factor = 0.00001    # delta t
                    fs[i,0] += -force * ux * factor
                    fs[i,1] += -force * uy * factor

        x_delta = np.zeros(shape=(n, 2))
        for i in range(n):
            for j in range(2):
                x_delta[i, j] = fs[i, j] / 1

        p = update_position(p, x_delta)
        pos = p
    
#    return p



cimport cython
@cython.boundscheck(False)
@cython.wraparound(False)
@cython.cdivision(True)
cpdef update_position(cnp.ndarray[double, ndim=2] p, cnp.ndarray[double, ndim=2] delta_r):
    cdef Py_ssize_t i
    cdef cnp.ndarray[double, ndim=2] new_pos
    cdef double x, y
    cdef Py_ssize_t n = p.shape[0]

    new_pos = np.empty_like(p, dtype=np.float64)
    
    for i in range(n):
        x = p[i, 0] + delta_r[i, 0]
        y = p[i, 1] + delta_r[i, 1]

        new_pos[i, 0] = x
        new_pos[i, 1] = y

    return new_pos

10.4.2. Execution time for Cy (Num. of threads=1)#

Calculating execution time for cython versions (number of threads is 1):

(Execution time is “compute_time_cy_1”)

p = particle_initial_position(250)

start_time = time.time()

simulate_cy_1(p, 1000)

end_time = time.time()

compute_time_cy_1 = end_time - start_time
print("simulate_cy execution time_1:", compute_time_cy_1)
simulate_cy execution time_1: 16.532904386520386

10.4.3. Cy code (Num. of threads=4)#

number of threads is 4.

%%cython --force -c=/openmp

import pyximport
pyximport.install()

import matplotlib.pyplot as plt
import timeit
import time
import numpy as np
cimport numpy as cnp
cimport cython
from cython.parallel import prange

# Used to randomly generate the initial positions of "n" particles.
# not optimised objects and therefore identical to Py
def particle_initial_position(n):
    np.random.seed(0)
    p = np.random.rand(n, 2) * 2
    for i in range(n):
        while True:
            conflict = False
            for j in range(i):
                distance = np.linalg.norm(p[i] - p[j])
                if distance <= 0.1:
                    conflict = True
                    break
            if not conflict:
                break
            p[i] = np.random.rand(2) * 2
    return p


# main function,
# "n" is the number of particles,
# "iterations" is the total number of iterations (how many Δt are computed).
cimport cython
@cython.boundscheck(False)
@cython.wraparound(False)
@cython.cdivision(True)
cpdef simulate_cy_4(cnp.ndarray[double, ndim=2] p, int iterations):
#    cdef cnp.ndarray[double, ndim=2] p = particle_initial_position(n)    # position of particles
    cdef Py_ssize_t n = len(p)
    
    cdef cnp.ndarray[double, ndim=2] fs = np.zeros(shape=(n, 2))    # force, vector
    cdef cnp.ndarray[double, ndim=2] x_delta = np.zeros(shape=(n, 2))    # displacement
    cdef cnp.ndarray[double, ndim=2] pos = p.copy()    # new position of particles
    cdef Py_ssize_t i, j
    cdef double x, y, dist, ux, uy, force, factor
    cdef double epsilon = 1
    cdef double sigma = 0.1
    
    for _ in range(iterations):
        fs[:, :] = 0.0

        for i in prange(n, num_threads=4, nogil=True):
            for j in range(n):
                if i != j:
                    x = pos[j, 0] - pos[i, 0]
                    y = pos[j, 1] - pos[i, 1]
                    dist = (x ** 2 + y ** 2) ** 0.5

                    ux = x / dist
                    uy = y / dist
                    force = 48 * epsilon * (sigma ** 12) / (dist ** 13) - 24 * epsilon * (sigma ** 6) / (dist ** 7)
                    factor = 0.00001
                    fs[i, 0] += -force * ux * factor
                    fs[i, 1] += -force * uy * factor

        x_delta[:, :] = 0.0
        for i in prange(n, num_threads=4, nogil=True):
            for j in range(2):
                x_delta[i, j] = fs[i, j] / 1.0

        pos = update_position(pos, x_delta)
    
    return pos




cimport cython
@cython.boundscheck(False)
@cython.wraparound(False)
@cython.cdivision(True)
cpdef update_position(cnp.ndarray[double, ndim=2] p, cnp.ndarray[double, ndim=2] delta_r):
    cdef Py_ssize_t i
    cdef cnp.ndarray[double, ndim=2] new_pos
    cdef double x, y
    cdef Py_ssize_t n = p.shape[0]

    new_pos = np.empty_like(p, dtype=np.float64)
    
    for i in prange(n, num_threads=4, nogil=True):
        x = p[i, 0] + delta_r[i, 0]
        y = p[i, 1] + delta_r[i, 1]

        new_pos[i, 0] = x
        new_pos[i, 1] = y
    return new_pos

10.4.4. Execution time for Cy (Num. of threads=4)#

Calculating execution time for cython versions (number of threads is 4):

(Execution time is “compute_time_cy_threads_4”)

p = particle_initial_position(250)

start_time = time.time()

simulate_cy_4(p, 1000)

end_time = time.time()

compute_time_cy_4 = end_time - start_time
print("simulate_cy execution time_4:", compute_time_cy_4)
simulate_cy execution time_4: 5.352380275726318

10.5. C#

Reconstructing particle motion models in C.

10.5.1. C code (Num. of threads=1)#

%%file executiontime.c
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <math.h>

#define N 250       //Number of particles
#define EPSILON 1.0    //Parameters in the potential energy equation
#define SIGMA 0.1    //Parameters in the potential energy equation
#define T_DELTA 0.00001
#define ITERATIONS 1000

typedef struct {
    double x;
    double y;
} Vector;


//Randomly generate the initial positions of the particles and ensure that the distance between particles is greater than 0.1.
void initialize_particles(Vector p[N]) {
    srand(time(NULL));

    for (int i = 0; i < N; i++) {
        p[i].x = (double)rand() / RAND_MAX * 2;
        p[i].y = (double)rand() / RAND_MAX * 2;

        // Ensure that the spacing between particles is greater than 0.1
        for (int j = 0; j < i; j++) {
            double distance = sqrt(pow(p[j].x - p[i].x, 2) + pow(p[j].y - p[i].y, 2));
            if (distance <= 0.1) {
                i--;  // Regenerate coordinates
                break;
            }
        }
    }
}


// Calculate the force
void calculate_force(Vector p[N], Vector fs[N]) {
    for (int i = 0; i < N; i++) {
        fs[i].x = 0.0;
        fs[i].y = 0.0;
    }

    for (int i = 0; i < N; i++) {
        for (int j = 0; j < N; j++) {
            if (i != j) {
                double r_x = p[j].x - p[i].x;
                double r_y = p[j].y - p[i].y;
                double dist = sqrt(r_x * r_x + r_y * r_y);

                double unit_vector_x = r_x / dist;
                double unit_vector_y = r_y / dist;

                double force = 48.0 * EPSILON * pow(SIGMA, 12) / pow(dist, 13) - 24.0 * EPSILON * pow(SIGMA, 6) / pow(dist, 7);

                fs[i].x += -force * unit_vector_x;
                fs[i].y += -force * unit_vector_y;
            }
        }
    }
}


// update position of particles.
void update_position(Vector p[N], Vector delta_r[N]) {
    for (int i = 0; i < N; i++) {
        p[i].x += delta_r[i].x;
        p[i].y += delta_r[i].y;
    }
}


// Print result
void print_positions(Vector p[N]) {
    for (int i = 0; i < N; i++) {
        printf("P(%d): [%.8f, %.8f]\n", i, p[i].x, p[i].y);
    }
}


// main function, call other functions.
int main(void) {
    clock_t start_time, end_time;
    double total_time;

    start_time = clock();
    Vector p[N];
    Vector fs[N];
    Vector delta_r[N];

    initialize_particles(p);

    for (int iter = 0; iter < ITERATIONS; iter++) {
        calculate_force(p, fs);

        for (int i = 0; i < N; i++) {
            delta_r[i].x = fs[i].x / 1 * T_DELTA;
            delta_r[i].y = fs[i].y / 1 * T_DELTA;
        }

        update_position(p, delta_r);
    }

    end_time = clock(); 

    total_time = (double)(end_time - start_time) / CLOCKS_PER_SEC;

    printf("Code execution time:%f s\n", total_time);
//    print_positions(p);

    return 0;
}
Overwriting executiontime.c

10.5.2. Execution time for C (Num. of threads=1)#

Calculating execution time for C versions:

(Execution time is “simulate_c_execution_time_1”)

!gcc -Wall -pedantic -O3 -o executiontime executiontime.c
import time
t0 = time.time()
!executiontime
t1 = time.time()

compute_time_c_1 = t1 - t0
Code execution time:9.601000 s

10.5.3. C code (Num. of threads=4)#

%%file executiontime_4.c
#include <stdio.h>
#include <stdlib.h>
#include <windows.h>
#include <time.h>
#include <math.h>

#define N 250
#define EPSILON 1.0
#define SIGMA 0.1
#define T_DELTA 0.00001
#define ITERATIONS 1000
#define NUM_THREADS 4 // 设置线程数量

typedef struct {
    double x;
    double y;
} Vector;

// 添加全局变量以共享数据
Vector p[N];
Vector fs[N];
Vector delta_r[N];
CRITICAL_SECTION criticalSection; // 定义互斥锁

void initialize_particles(Vector p[N]) {
    srand(time(NULL));

    for (int i = 0; i < N; i++) {
        p[i].x = (double)rand() / RAND_MAX * 2;
        p[i].y = (double)rand() / RAND_MAX * 2;

        for (int j = 0; j < i; j++) {
            double distance = sqrt(pow(p[j].x - p[i].x, 2) + pow(p[j].y - p[i].y, 2));
            if (distance <= 0.1) {
                i--;
                break;
            }
        }
    }
}

void calculate_force(Vector p[N], Vector fs[N], int start, int end) {
    for (int i = start; i < end; i++) {
        fs[i].x = 0.0;
        fs[i].y = 0.0;
    }

    for (int i = start; i < end; i++) {
        for (int j = 0; j < N; j++) {
            if (i != j) {
                double r_x = p[j].x - p[i].x;
                double r_y = p[j].y - p[i].y;
                double dist = sqrt(r_x * r_x + r_y * r_y);

                double unit_vector_x = r_x / dist;
                double unit_vector_y = r_y / dist;

                double force = 48.0 * EPSILON * pow(SIGMA, 12) / pow(dist, 13) - 24.0 * EPSILON * pow(SIGMA, 6) / pow(dist, 7);

                fs[i].x += -force * unit_vector_x;
                fs[i].y += -force * unit_vector_y;
            }
        }
    }
}

void update_position(Vector p[N], Vector delta_r[N], int start, int end) {
    for (int i = start; i < end; i++) {
        p[i].x += delta_r[i].x;
        p[i].y += delta_r[i].y;
    }
}

// 线程函数
DWORD WINAPI ThreadFunction(LPVOID lpParam) {
    int thread_id = (int)lpParam;
    int start, end;
    int particles_per_thread = N / NUM_THREADS;
    start = thread_id * particles_per_thread;
    end = (thread_id == NUM_THREADS - 1) ? N : (start + particles_per_thread);

    for (int iter = 0; iter < ITERATIONS; iter++) {
        calculate_force(p, fs, start, end);
        
        for (int i = start; i < end; i++) {
            delta_r[i].x = fs[i].x / 1 * T_DELTA;
            delta_r[i].y = fs[i].y / 1 * T_DELTA;
        }

        // 同步更新粒子位置
        EnterCriticalSection(&criticalSection);
        update_position(p, delta_r, start, end);
        LeaveCriticalSection(&criticalSection);
    }

    return 0;
}

int main(void) {
    clock_t start_time, end_time;
    double total_time;
    HANDLE threads[NUM_THREADS];

    // 初始化互斥锁
    InitializeCriticalSection(&criticalSection);

    start_time = clock();
    initialize_particles(p);

    for (int i = 0; i < NUM_THREADS; i++) {
        threads[i] = CreateThread(NULL, 0, ThreadFunction, (LPVOID)i, 0, NULL);
        if (threads[i] == NULL) {
            fprintf(stderr, "Error creating thread\n");
            return 1;
        }
    }

    // 等待线程完成
    WaitForMultipleObjects(NUM_THREADS, threads, TRUE, INFINITE);

    end_time = clock();

    total_time = (double)(end_time - start_time) / CLOCKS_PER_SEC;

    printf("Code execution time:%f s\n", total_time);

    // 清理互斥锁
    DeleteCriticalSection(&criticalSection);

    return 0;
}
Overwriting executiontime_4.c

10.5.4. Execution time for C (Num. of threads=4)#

Calculating execution time for C versions:

(Execution time is “simulate_c_execution_time_4”)

!gcc -Wall -pedantic -O3 -o executiontime_4 executiontime_4.c
import time
t0 = time.time()
!executiontime_4
t1 = time.time()

compute_time_c_4 = t1 - t0
Code execution time:2.196000 s
print(compute_time_np_1)
10.788192510604858

10.6. Performance-comparison#

import pandas as pd
from IPython.display import HTML

data = {
    'Methods': ['Python', 'Numpy','Numpy','Numba', 'Numba','Cython','Cython', 'C','C'],
    'Number of thread': ['1', '1','4', '1','4', '1','4','1','4'],
    'Excution time(s)': [compute_time_py, compute_time_np_1,compute_time_np_4, compute_time_nb_1,compute_time_nb_4, compute_time_cy_1, compute_time_cy_4, compute_time_c_1,compute_time_c_4],
    'Speed up': [1, compute_time_py/compute_time_np_1, compute_time_py/compute_time_np_4, compute_time_py/compute_time_nb_1, compute_time_py/compute_time_nb_4, compute_time_py/compute_time_cy_1,compute_time_py/compute_time_cy_4,compute_time_py/compute_time_c_1,compute_time_py/compute_time_c_4]
}
df = pd.DataFrame(data)

# Creating style functions
def add_border(val):
    return 'border: 1px solid black'

# Applying style functions to data boxes
styled_df = df.style.applymap(add_border)

# Defining CSS styles
table_style = [
    {'selector': 'table', 'props': [('border-collapse', 'collapse')]},
    {'selector': 'th, td', 'props': [('border', '1px solid black')]}
]

# Adding styles to stylised data boxes
styled_df.set_table_styles(table_style)

# Displaying stylised data boxes in Jupyter Notebook
HTML(styled_df.to_html())
  Methods Number of thread Excution time(s) Speed up
0 Python 1 5597.128479 1.000000
1 Numpy 1 10.788193 518.819855
2 Numpy 4 11.439702 489.272246
3 Numba 1 2.145235 2609.098566
4 Numba 4 0.448737 12473.067024
5 Cython 1 16.532904 338.544780
6 Cython 4 5.352380 1045.726983
7 C 1 9.656067 579.648892
8 C 4 2.235951 2503.242699