Fluid fun with py5 and NumPy

Hello! I felt like sketching this afternoon, so I dusted off my old implementation of Stam’s Real-Time Fluid Dynamics for Games. The original version used pygame and NumPy – turns out porting it to py5 was straightforward. Here’s a screenshot:

And here’s the code:

import py5
import numpy as np


def remap(a, low0, high0, low1, high1):
    return a * (high1 - low1) / (high0 - low0)


def normalize(a):
    a_min = np.min(a)
    a_max = np.max(a)
    return remap(a, a_min, a_max, 0, 1)


def add_source(field, source, dt):
    field += dt * source


def set_boundary(boundary, field):
    if boundary == "u":
        # Left column.
        field[1:-1, 0] = -field[1:-1, 1]
        # Right column.
        field[1:-1, -1] = -field[1:-1, -2]
    else:
        # Left column.
        field[1:-1, 0] = field[1:-1, 1]
        # Right column.
        field[1:-1, -1] = field[1:-1, -2]

    if boundary == "v":
        # Bottom row.
        field[-1, 1:-1] = -field[-2, 1:-1]
        # Top row.
        field[0, 1:-1] = -field[1, 1:-1]
    else:
        # Bottom row.
        field[-1, 1:-1] = field[-2, 1:-1]
        # Top row.
        field[0, 1:-1] = field[1, 1:-1]

    # Bottom-left corner.
    field[-1, 0] = 0.5 * (field[-2, 0] + field[-1, 1])
    # Top-left corner.
    field[0, 0] = 0.5 * (field[1, 0] + field[0, 1])
    # Bottom-right corner.
    field[-1, -1] = 0.5 * (field[-1, -2] + field[-2, -1])
    # Top-right corner.
    field[0, -1] = 0.5 * (field[0, -2] + field[1, -1])


def diffuse(boundary, field, field_prev, diff, dt):
    N = field.shape[0] - 2
    a = dt * diff * N * N

    # Solve the system with Gauss-Seidel.
    for _ in range(20):
        # Get the field values at the neighboring grid cells.
        left = field_prev[1:-1, 0:-2]
        right = field_prev[1:-1, 2:]
        top = field_prev[0:-2, 1:-1]
        bottom = field_prev[2:, 1:-1]
        neighbors = left + right + top + bottom
        # Update the field.
        field[1:-1, 1:-1] = (field_prev[1:-1, 1:-1] + a * neighbors) / (1 + 4 * a)

    # Update the boundaries.
    set_boundary(boundary, field)


def advect(boundary, field, field_prev, u, v, dt):
    N = field.shape[0] - 2
    dt0 = dt * N

    # Create a coordinate grid.
    x = np.arange(1, N + 1)
    y = np.arange(1, N + 1)
    X, Y = np.meshgrid(x, y)

    # Calculate the coordinates of particles' points of origin.
    xo = X - dt0 * u[1:-1, 1:-1]
    yo = Y - dt0 * v[1:-1, 1:-1]

    # Constrain the coordinates to the grid interior.
    xo = np.clip(xo, 0.5, N + 0.5)
    yo = np.clip(yo, 0.5, N + 0.5)

    # Calculate the indices of the grid cells surrounding the
    # point of origin.
    xl = xo.astype(int)
    xr = xl + 1
    yt = yo.astype(int)
    yb = yt + 1

    # Calculate the interpolation weights for surrounding grid cells.
    wr = xo - xl
    wl = 1 - wr
    wb = yo - yt
    wt = 1 - wb

    # Get the field values at the surrounding grid cells.
    f_tl = field_prev[yt.ravel(), xl.ravel()].reshape(N, N)
    f_tr = field_prev[yt.ravel(), xr.ravel()].reshape(N, N)
    f_bl = field_prev[yb.ravel(), xl.ravel()].reshape(N, N)
    f_br = field_prev[yb.ravel(), xr.ravel()].reshape(N, N)

    # Update the field
    field[1:-1, 1:-1] = wl * (wb * f_bl + wt * f_tl) + wr * (wb * f_br + wt * f_tr)

    # Update the boundaries.
    set_boundary(boundary, field)


def project(u, v, u0, v0):
    N = u.shape[0] - 2
    h = 1 / N

    # Update v0 and reset u0.
    du = u[1:-1, 2:] - u[1:-1, 0:-2]
    dv = v[2:, 1:-1] - v[0:-2, 1:-1]
    v0[1:-1, 1:-1] = -0.5 * h * (du + dv)
    u0[1:-1, 1:-1] = 0

    # Update the boundaries.
    set_boundary(None, v0)
    set_boundary(None, u0)

    # Calculate u0 with Gauss-Seidel.
    for _ in range(20):
        # Get the field values at the neighboring grid cells.
        left = u0[1:-1, 0:-2]
        right = u0[1:-1, 2:]
        top = u0[0:-2, 1:-1]
        bottom = u0[2:, 1:-1]
        neighbors = left + right + top + bottom
        # Update the field.
        u0[1:-1, 1:-1] = (v0[1:-1, 1:-1] + neighbors) / 4
        # Update the boundary.
        set_boundary(None, u0)

    # Update the velocities.
    u[1:-1, 1:-1] -= 0.5 * (u0[1:-1, 2:] - u0[1:-1, 0:-2]) / h
    v[1:-1, 1:-1] -= 0.5 * (u0[2:, 1:-1] - u0[0:-2, 1:-1]) / h

    # Update the boundaries.
    set_boundary("u", u)
    set_boundary("v", v)


def dens_step(field, field_prev, u, v, diff, dt):
    add_source(field, field_prev, dt)

    # Go backward in time.
    field, field_prev = field_prev, field
    diffuse(None, field, field_prev, diff, dt)

    # Go forward in time.
    field, field_prev = field_prev, field
    advect(None, field, field_prev, u, v, dt)


def vel_step(u, v, u0, v0, visc, dt):
    add_source(u, u0, dt)
    add_source(v, v0, dt)

    # Go backward in time.
    u, u0 = u0, u
    diffuse("u", u, u0, visc, dt)
    v, v0 = v0, v
    diffuse("v", v, v0, visc, dt)
    project(u, v, u0, v0)

    # Go forward in time.
    u, u0 = u0, u
    v, v0 = v0, v
    advect("u", u, u0, u0, v0, dt)
    advect("v", v, v0, u0, v0, dt)
    project(u, v, u0, v0)


# Simulation setup.
N = 400
dt = 1
diff = 0.1
visc = 0.001

# Velocity arrays.
u_prev = np.zeros((N + 2, N + 2))
u = np.zeros((N + 2, N + 2))
v_prev = np.zeros((N + 2, N + 2))
v = np.zeros((N + 2, N + 2))

# Density arrays.
dens_prev = np.zeros((N + 2, N + 2))
dens = np.zeros((N + 2, N + 2))


def setup():
    py5.size(N, N)
    py5.pixel_density(1)


def draw():
    # Reset sources.
    dens_prev[:, :] = 0
    u_prev[:, :] = 0
    v_prev[:, :] = 0

    # Get the mouse coordinates.
    mx = np.clip(py5.mouse_x, 1, N).astype(int)
    my = np.clip(py5.mouse_y, 1, N).astype(int)

    # Add sources.
    u_prev[my - 1 : my + 1, mx - 1 : mx + 1] = np.random.uniform(-1, 1)
    v_prev[my - 1 : my + 1, mx - 1 : mx + 1] = -1
    dens_prev[my - 1 : my + 1, mx - 1 : mx + 1] = 100

    # Update the simulation.
    vel_step(u, v, u_prev, v_prev, visc, dt)
    dens_step(dens, dens_prev, u, v, diff, dt)

    # Normalize the variable to be drawn.
    D = normalize(dens[1:-1, 1:-1])

    # Scale the values for display.
    D *= 255

    # Display the RGB array.
    py5.load_np_pixels()
    py5.np_pixels[:, :, 1] = 0 # red channel
    py5.np_pixels[:, :, 2] = 0 # green channel
    py5.np_pixels[:, :, 3] = D # blue channel
    py5.update_np_pixels()


py5.run_sketch()

The paper says “Extending the solver to three dimensions should be straightforward to anyone who understands our code.” so I’ll try that out when I get a chance. Would also love to see somebody else’s implementation!

4 Likes

This is so cool @mcintyre, thank you for sharing it!

By the way, py5 does have remap() implemented using NumPy (it can take np.arrays!), modeled after Processing’s map() (remap() — py5 documentation)

I tried it and I also tried this to extract a bit more of oomph from my old computer but it didn’t help much…

def normalize(a):
    a_min = np.min(a)
    a_max = np.max(a)
    return (a - a_min) / (a_max - a_min) # with luck, no zeros dividing here...
1 Like

Thanks @villares! And good catch with remap(). Regarding oomph, hmm…

@hx2A any ideas for performance improvements? I hadn’t thought much about it while porting the algorithm from C. Here’s a quick profile using the standard library:

from contextlib import contextmanager
import time
import py5
import numpy as np


time_log = {
    "normalize": { "ns": 0, "pct": 0},
    "add_source": { "ns": 0, "pct": 0},
    "set_boundary": { "ns": 0, "pct": 0},
    "diffuse": { "ns": 0, "pct": 0},
    "advect": { "ns": 0, "pct": 0},
    "project": { "ns": 0, "pct": 0},
    "dens_step": { "ns": 0, "pct": 0},
    "vel_step": { "ns": 0, "pct": 0},
    "update_pixels": { "ns": 0, "pct": 0},
}


@contextmanager
def timer(func):
    try:
        start_time = time.time_ns()
        yield
    finally:
        stop_time = time.time_ns()
        run_time = stop_time - start_time
        time_log[func]["ns"] += run_time


@timer("normalize")
def normalize(a):
    a_min = np.min(a)
    a_max = np.max(a)
    return py5.remap(a, a_min, a_max, 0, 1)

# -- snip --

# Configure a timed run
start_time = time.time()
end_time = start_time + 2


def draw():
    # -- snip --

    # Display the RGB array.
    with timer("update_pixels"):
        py5.load_np_pixels()
        py5.np_pixels[:, :, 1] = 0 # red channel
        py5.np_pixels[:, :, 2] = 0 # green channel
        py5.np_pixels[:, :, 3] = D # blue channel
        py5.update_np_pixels()

    if time.time() > end_time:
        py5.exit_sketch()
        total_time = 0
        for func in time_log.keys():
            total_time += time_log[func]["ns"]
        for func in time_log.keys():
            pct = (time_log[func]["ns"] / total_time) * 100
            time_log[func]["pct"] = pct
            ns = time_log[func]["ns"]
            print(f"{func:13} | {ns:10} ns | {pct:05.2f} %")

# Printout from my M1 MacBook Pro
normalize     |   12196000 ns | 00.38 %
add_source    |    7672000 ns | 00.24 %
set_boundary  |    6512000 ns | 00.20 %
diffuse       |  644067000 ns | 20.08 %
advect        |  469741000 ns | 14.65 %
project       |  465516000 ns | 14.52 %
dens_step     |  366104000 ns | 11.42 %
vel_step      | 1221762000 ns | 38.10 %
update_pixels |   13439000 ns | 00.42 %
1 Like

I’m usually too clumsy to use this, but there are also some profiler helper functions built into py5… profile_draw() — py5 documentation

I should try launching a separate thread for the calculations and see what happens…

1 Like

Hi @mcintyre! I like this py5 project, it is a good use of numpy and py5.

Thank you @villares for suggesting profile_draw(). Internally that uses Python’s line profiler and can give you a line by line breakdown of where the Sketch is spending most of its time.

But, glancing at your code I don’t see any glaring problems because you are already using numpy with vectorized operations. There might be other ways to improve things though, but I’d like to see the line profiler output before brainstorming on that.

2 Likes

Thanks for the suggestion @villares! I didn’t realize py5 featured that particular goodie.

@hx2A I appreciate you taking a look. Here’s the output from profile_draw():

Timer unit: 1e-09 s

Total time: 1.21819 s
File: /Users/nick/code/fluid/sketch.py
Function: draw at line 205

Line #      Hits         Time  Per Hit   % Time  Line Contents
==============================================================
   205                                           def draw():
   206                                               # Reset sources.
   207        20     748000.0  37400.0      0.1      dens_prev[:, :] = 0
   208        20     587000.0  29350.0      0.0      u_prev[:, :] = 0
   209        20     564000.0  28200.0      0.0      v_prev[:, :] = 0
   210                                           
   211                                               # Get the mouse coordinates.
   212        20    1302000.0  65100.0      0.1      mx = np.clip(py5.mouse_x, 1, N).astype(int)
   213        20     576000.0  28800.0      0.0      my = np.clip(py5.mouse_y, 1, N).astype(int)
   214                                           
   215                                               # Add sources.
   216        20     305000.0  15250.0      0.0      u_prev[my - 1 : my + 1, mx - 1 : mx + 1] = np.random.uniform(-1, 1)
   217        20      24000.0   1200.0      0.0      v_prev[my - 1 : my + 1, mx - 1 : mx + 1] = -1
   218        20      19000.0    950.0      0.0      dens_prev[my - 1 : my + 1, mx - 1 : mx + 1] = 100
   219                                           
   220                                               # Update the simulation.
   221        20  920617000.0  4.6e+07     75.6      vel_step(u, v, u_prev, v_prev, visc, dt)
   222        20  272811000.0 1.36e+07     22.4      dens_step(dens, dens_prev, u, v, diff, dt)
   223                                           
   224                                               # Normalize the variable to be drawn.
   225        20    7868000.0 393400.0      0.6      D = normalize(dens[1:-1, 1:-1])
   226                                           
   227                                               # Scale the values for display.
   228        20     611000.0  30550.0      0.1      D *= 255
   229                                           
   230                                               # Display the RGB array.
   231        20    7168000.0 358400.0      0.6      py5.load_np_pixels()
   232        20    1338000.0  66900.0      0.1      py5.np_pixels[:, :, 1] = 0 # red channel
   233        20    1114000.0  55700.0      0.1      py5.np_pixels[:, :, 2] = 0 # green channel
   234        20    1217000.0  60850.0      0.1      py5.np_pixels[:, :, 3] = D # blue channel
   235        20    1282000.0  64100.0      0.1      py5.update_np_pixels()
   236                                           
   237        20      36000.0   1800.0      0.0      if time.time() > end_time:
   238                                                   py5.print_line_profiler_stats()
   239                                                   py5.exit_sketch()

It looks like vel_step() consumes the most time per hit and about 75% of the total execution time.

Had a hunch, so I checked within that function to see if I accidentally copied arrays instead of creating views:

def vel_step(u, v, u0, v0, visc, dt):
    # -- snip --

    # Check memory
    arrays = ("u", u), ("v", v), ("u0", u0), ("v0", v0)
    for name, array in arrays:
        if array.base is None:
            print(f"{name} is a copy")
        else:
            print(f"{name} is a view")

# Output:
u is a copy
v is a copy
u0 is a copy
v0 is a copy

Oops! All those copies probably aren’t helping performance. I was glad to get the code running, so I’ll pull on that thread a bit more to see if I can get it running well. Would love any other suggestions if they come to mind!

1 Like