Strategy for drawing a million objects/rectangles?

hi all, i’m returning to Processing after a few years absence, using 4.2 IDE. i need to draw a million rectangles in 3d space, each with a new position every frame (think floating pixels) but frameRate on my mac is slowing to a crawl (< 2 fps) above about 20000.

i’m using P3D, and i’ve tried using both rect and PShape, and surprisingly (to me) rect was faster, though still borks PShape.GROUP doesn’t apply because i need to change position of each object every frame.

i’ve checked the latest in Examples > Demos > Performance but even those examples default to only 50000 (beyond the limit on my machine).

in previous Processing versions i would have used gl() and pgl() magic with some hint() to get a big speed bump on dealing with large quantities of objects but i gather GL functions have been deprecated because P3D should abstract it.

any suggestions for strategies to draw a million rectangles in 3D faster? fyi i’m still using an intel macbook, would using M1/M2 get me significantly closer to my target?

Hello,

A related discussion:

May be something of interest in there…

:)

You can definitely do this using OpenGL calls if your Mac supports it. Whether they are deprecated or not, the OpenGL calls still work great from Processing, at least until some future MacOS update removes OpenGL entirely.

On my 7+ year old Linux machine with a mid-range nVidia card from that time, I can animate and render at least 2 million camera aligned triangles with the fragment shader rendering a sphere on each and stay above 60 fps.

I’m not restricting myself to pgl, however, but using GL4 gl = pgl.gl.getGL4(); to access more modern calls. I’m rendering spheres on either single triangles or on pairs in a square rendered with glDrawArraysInstanced() and pass in a single position per sphere using glVertexAttribDivisor(). The vertex shader uses that position data to compute the vertex positions for the camera aligned triangles.

Edit: Hmm, I think I was an order off on my number. A million sphere-shaded triangles renders at 25 fps for me. But a million triangles on the screen is awfully hard to see as anything larger than just points. I can render a million points at 60 fps. Is there any reason you think you want rectangles instead of just points?

Give this code a try to see if it even runs on MacOS. And then to see how fast it is.

import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.nio.FloatBuffer;
import java.nio.IntBuffer;

import com.jogamp.opengl.*;


int N = 1000000;

PVector[] pos;
PVector[] vel;

void setup() {
  size( 1200, 1200, P3D );
  
  pos = new PVector[ N ];
  vel = new PVector[ N ];
  for( int i=0; i<N; i++ ) {
    pos[i] = new PVector( random(-1, 1), random(-1,1), random(-1,1) );
    vel[i] = PVector.random3D().mult(0.002);
  }
  initOglBuffers();
}

void draw() {
  background( 0 );
  stroke( 255 );
  for( int i=0; i<N; i++ ) {
    pos[i].add( vel[i] );
    if( pos[i].x < -1 ) { pos[i].x = -2-pos[i].x;  vel[i].x = -vel[i].x; }
    if( pos[i].x >  1 ) { pos[i].x =  2-pos[i].x;  vel[i].x = -vel[i].x; }
    if( pos[i].y < -1 ) { pos[i].y = -2-pos[i].y;  vel[i].y = -vel[i].y; }
    if( pos[i].y >  1 ) { pos[i].y =  2-pos[i].y;  vel[i].y = -vel[i].y; }
    if( pos[i].z < -1 ) { pos[i].z = -2-pos[i].z;  vel[i].z = -vel[i].z; }
    if( pos[i].z >  1 ) { pos[i].z =  2-pos[i].z;  vel[i].z = -vel[i].z; }
  }
  drawPoints();
  text( frameRate, 4, 16 );
}


////

FloatBuffer posBuffer;
int posVboId;

PJOGL pgl;
GL4 gl;

PShader shdr;

void initOglBuffers() {
  pgl = (PJOGL) beginPGL();
  gl = pgl.gl.getGL4();
  
  shdr = new PShader( g.parent, vertSrc, fragSrc );
  shdr.set( "N", float(N) );

  posBuffer = allocateDirectFloatBuffer( 3*N );

  // Get GL ids for all the buffers
  IntBuffer intBuffer = IntBuffer.allocate(1);  
  gl.glGenBuffers(1, intBuffer);
  posVboId = intBuffer.get(0);
  
  endPGL();
}

void drawPoints() {
  shdr.set( "time", frameCount / 60.0 );
  posBuffer.rewind();
  for( int i=0; i<N; i++ ) {
    posBuffer.put( pos[i].x );
    posBuffer.put( pos[i].y );
    posBuffer.put( pos[i].z );
  }
  posBuffer.rewind();

  pgl = (PJOGL) beginPGL();
  gl = pgl.gl.getGL4();

  shdr.bind();
  gl.glEnableVertexAttribArray(0);  // position

  // Copy vertex data to VBOs
  gl.glBindBuffer(GL.GL_ARRAY_BUFFER, posVboId);
  // glBufferData( target, size, data, usage )
  gl.glBufferData(GL.GL_ARRAY_BUFFER, Float.BYTES * 3*N, posBuffer, GL.GL_DYNAMIC_DRAW);

  //   glVertexAttribPointer( index, size, type, normalized, stride, pointer )
  gl.glVertexAttribPointer( 0, 3, GL.GL_FLOAT, false, 3*Float.BYTES, 0 );

  // Draw the points
  gl.glEnable( GL3.GL_PROGRAM_POINT_SIZE );
  gl.glDrawArrays( PGL.POINTS, 0, N );

  gl.glBindBuffer(GL.GL_ARRAY_BUFFER, 0);

  gl.glDisableVertexAttribArray(0);
  shdr.unbind();
  endPGL();
}

FloatBuffer allocateDirectFloatBuffer(int n) {
  return ByteBuffer.allocateDirect(n * Float.BYTES).order(ByteOrder.nativeOrder()).asFloatBuffer();
}

IntBuffer allocateDirectIntBuffer(int n) {
  return ByteBuffer.allocateDirect(n * Integer.BYTES).order(ByteOrder.nativeOrder()).asIntBuffer();
}

String[] vertSrc = { """
#version 330 core
precision highp float;
precision highp int;

uniform vec2 resolution;
uniform mat4 modelview;
uniform mat4 projection;
uniform float N;
uniform float time;

layout (location = 0) in vec3 aPos0;
out vec4 vColor;

#define TAU 6.283185307179586

float sn( float t ) { return sin( TAU * t ); }
float cs( float t ) { return cos( TAU * t ); }

vec2 rot( in vec2 p, float a ) {
   return cos(a)*p + sin(a)*vec2(-p.y, p.x);
}

vec3 hsb2rgb( in vec3 c ) {
   vec3 rgb = clamp(abs(mod(c.x*6.0+vec3(0.0,4.0,2.0),
                            6.0)-3.0)-1.0, 
                    0.0, 
                    1.0 );
   rgb = rgb*rgb*(3.0-2.0*rgb);
   return c.z * mix( vec3(1.0), rgb, c.y);
}

void main() {
  float u = float(gl_VertexID)/N;
  vec3 p = aPos0 * 0.5;
  p.xz = rot( p.xz, TAU*0.02*time );
  p.yz = rot( p.yz, -TAU*0.1 );
  p.xy *= 2./(1.5+p.z);
  gl_Position = vec4( p, 1. );
  gl_PointSize = 4./(1.5+p.z);
  vColor = vec4( hsb2rgb( vec3( u, 0.7, 1.) ), 1. );
//  vColor = vec4( (aPos0+vec3(1.))*0.5, 1. );
}
""" };


String[] fragSrc = { """
#version 330 core
in vec4 vColor;
out vec4 outColor;
void main() {
  outColor = vColor;
}
""" };
3 Likes

thanks for that, i had tried searching but hadn’t come across that thread, much appreciated. yes lots of useful info here, tho i can’t tell if the translate bug mentioned got fixed.

@scudly such an elegant solution, thanks, works great! i’m only getting 10fps with 1M but that’s much much closer to what i’m after and i am, after all, still on a very old mac. much appreciated!

[edit: I had a question about how to insert color in drawPoints so that pixel colors could be determined arbitrarily but have now figured it out, solution pasted below]

import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.nio.FloatBuffer;
import java.nio.IntBuffer;

import com.jogamp.opengl.*;

int N = 500000;

PVector[] pos;
PVector[] vel;
PVector[] col;

void setup() {
  size( 1200, 1200, P3D );

  pos = new PVector[ N ];
  vel = new PVector[ N ];
  col = new PVector[ N ];
  for ( int i=0; i<N; i++ ) {
    pos[i] = new PVector( random(-1, 1), random(-1, 1), random(-1, 1) );
    vel[i] = PVector.random3D().mult(0.002);
    col[i] = new PVector( random(1), random(1), 0.5 + random(0.5) ); // add 0.5 to brightness
  }
  initOglBuffers();
}

void draw() {
  background( 0 );
  stroke( 255 );
  for ( int i=0; i<N; i++ ) {
    pos[i].add( vel[i] );
    if ( pos[i].x < -1 ) {
      pos[i].x = -2-pos[i].x;
      vel[i].x = -vel[i].x;
    }
    if ( pos[i].x >  1 ) {
      pos[i].x =  2-pos[i].x;
      vel[i].x = -vel[i].x;
    }
    if ( pos[i].y < -1 ) {
      pos[i].y = -2-pos[i].y;
      vel[i].y = -vel[i].y;
    }
    if ( pos[i].y >  1 ) {
      pos[i].y =  2-pos[i].y;
      vel[i].y = -vel[i].y;
    }
    if ( pos[i].z < -1 ) {
      pos[i].z = -2-pos[i].z;
      vel[i].z = -vel[i].z;
    }
    if ( pos[i].z >  1 ) {
      pos[i].z =  2-pos[i].z;
      vel[i].z = -vel[i].z;
    }
  }
  drawPoints();
  text( frameRate, 4, 16 );
}


////

FloatBuffer posBuffer;
FloatBuffer colBuffer;
int posVboId;
int colorVboId;

PJOGL pgl;
GL4 gl;

PShader shdr;

void initOglBuffers() {
  pgl = (PJOGL) beginPGL();
  gl = pgl.gl.getGL4();

  shdr = new PShader( g.parent, vertSrc, fragSrc );
  shdr.set( "N", float(N) );

  posBuffer = allocateDirectFloatBuffer( 3*N );
  colBuffer = allocateDirectFloatBuffer( 3*N );

  // Get GL ids for all the buffers
  IntBuffer intBuffer = IntBuffer.allocate(2);
  gl.glGenBuffers(2, intBuffer);
  posVboId = intBuffer.get(0);
  colorVboId = intBuffer.get(1);

  endPGL();
}

void drawPoints() {
  shdr.set( "time", frameCount / 60.0 );
  posBuffer.rewind();
  colBuffer.rewind();
  for ( int i=0; i<N; i++ ) {
    posBuffer.put( pos[i].x );
    posBuffer.put( pos[i].y );
    posBuffer.put( pos[i].z );
    colBuffer.put( col[i].x);
    colBuffer.put( col[i].y);
    colBuffer.put( col[i].z);
  }
  posBuffer.rewind();
  colBuffer.rewind();

  pgl = (PJOGL) beginPGL();
  gl = pgl.gl.getGL4();

  shdr.bind();

  // set position to location 0  
  gl.glEnableVertexAttribArray(0);  // position  
  // Copy vertex data to VBOs
  gl.glBindBuffer(GL.GL_ARRAY_BUFFER, posVboId);
  // glBufferData( target, size, data, usage )
  gl.glBufferData(GL.GL_ARRAY_BUFFER, Float.BYTES * 3*N, posBuffer, GL.GL_DYNAMIC_DRAW);
  //   glVertexAttribPointer( index, size, type, normalized, stride, pointer )
  gl.glVertexAttribPointer( 0, 3, GL.GL_FLOAT, false, 3*Float.BYTES, 0 );


  // set color to location 1
  gl.glEnableVertexAttribArray(1);  
  // Copy vertex data to VBOs
  gl.glBindBuffer(GL.GL_ARRAY_BUFFER, colorVboId);
  // glBufferData( target, size, data, usage )
  gl.glBufferData(GL.GL_ARRAY_BUFFER, Float.BYTES * 3*N, colBuffer, GL.GL_DYNAMIC_DRAW);

  gl.glVertexAttribPointer( 1, 3, GL.GL_FLOAT, false, 3*Float.BYTES, 0 );

  // Draw the points
  gl.glEnable( GL3.GL_PROGRAM_POINT_SIZE );
  gl.glDrawArrays( PGL.POINTS, 0, N );

  gl.glBindBuffer(GL.GL_ARRAY_BUFFER, 0);

  gl.glDisableVertexAttribArray(0);
  shdr.unbind();
  endPGL();
}

FloatBuffer allocateDirectFloatBuffer(int n) {
  return ByteBuffer.allocateDirect(n * Float.BYTES).order(ByteOrder.nativeOrder()).asFloatBuffer();
}

IntBuffer allocateDirectIntBuffer(int n) {
  return ByteBuffer.allocateDirect(n * Integer.BYTES).order(ByteOrder.nativeOrder()).asIntBuffer();
}

String[] vertSrc = { """
#version 330 core
precision highp float;
precision highp int;

uniform vec2 resolution;
uniform mat4 modelview;
uniform mat4 projection;
uniform float N;
uniform float time;

layout (location = 0) in vec3 aPos0;
layout (location = 1) in vec3 thisColor;
out vec4 vColor;

#define TAU 6.283185307179586

  float sn( float t ) {
  return sin( TAU * t );
}
float cs( float t ) {
  return cos( TAU * t );
}

vec2 rot( in vec2 p, float a ) {
  return cos(a)*p + sin(a)*vec2(-p.y, p.x);
}

vec3 hsb2rgb( in vec3 c ) {
  vec3 rgb = clamp(abs(mod(c.x*6.0+vec3(0.0, 4.0, 2.0),
    6.0)-3.0)-1.0,
    0.0,
    1.0 );
  rgb = rgb*rgb*(3.0-2.0*rgb);
  return c.z * mix( vec3(1.0), rgb, c.y);
}

void main() {
  vec3 p = aPos0 * 0.5;
  p.xz = rot( p.xz, TAU*0.02*time );
  p.yz = rot( p.yz, -TAU*0.1 );
  p.xy *= 2./(1.5+p.z);
  gl_Position = vec4( p, 1. );
  gl_PointSize = 4./(1.5+p.z);
  vColor = vec4( hsb2rgb( thisColor ), 1.); 
}
""" };


String[] fragSrc = { """
#version 330 core
in vec4 vColor;
out vec4 outColor;
void main() {
  outColor = vColor;
}
""" };
1 Like