SwitchNet Neural network examples

seanc4s · February 22, 2025, 1:21am

Neural networks using fast transforms as a core component.

You can view the fast transform as a super fast to compute synthetic weight matrix that you combine with parametric activation functions as the adjustable part.
Or you can view the fast transform as a 1 to all connection algorithm linking together multiple small width layers into a complete wide layer. Gaining a speed advantage and parameter reduction advantage.

// Test with Lissajous curves
color c1;
float rate;
int epochs;
float[][] ex;
float[] work = new float[256];
SWNet net;

void setup() {
  size(400, 400);
  net = new SWNet(256, 4, 4,0.1);//vecLen,widthBy,depth,learning rate
  rate=0.05f; //Learning rate         
  epochs=20; //Epochs per frame
  c1 = color(0xff, 0xd7, 00);
  ex=new float[8][256];
  for (int i = 0; i < 127; i++) {
    // Training data
    float t = (i * 2 * PI) / 127;
    ex[0][2 * i] = sin(t);
    ex[0][2 * i + 1] = sin(2 * t);
    ex[1][2 * i] = sin(2 * t);
    ex[1][2 * i + 1] = sin(t);
    ex[2][2 * i] = sin(2 * t);
    ex[2][2 * i + 1] = sin(3 * t);
    ex[3][2 * i] = sin(3 * t);
    ex[3][2 * i + 1] = sin(2 * t);
    ex[4][2 * i] = sin(3 * t);
    ex[4][2 * i + 1] = sin(4 * t);
    ex[5][2 * i] = sin(4 * t);
    ex[5][2 * i + 1] = sin(3 * t);
    ex[6][2 * i] = sin(2 * t);
    ex[6][2 * i + 1] = sin(5 * t);
    ex[7][2 * i] = sin(5 * t);
    ex[7][2 * i + 1] = sin(2 * t);
  }
  textSize(16);
}

void draw() {
  background(0);
  //loadPixels();
  for (int k=0; k<epochs; k++) {
    for (int i = 0; i < ex.length; i++) {
      net.train(ex[i], ex[i]);
    }
  }
  for (int i = 0; i < 8; i++) {
    for (int j = 0; j < 255; j += 2) {
      set(25 + i * 40 + int(18 * ex[i][j]), 44 + int(18 * ex[i][j + 1]), c1);
    }
  }
  for (int i = 0; i < 8; i++) {
    net.recall(work, ex[i]);
    for (int j = 0; j < 255; j += 2) {
      set(25 + i * 40 +int( 18 * work[j]), 104 + int(18 * work[j + 1]), c1);
    }
  }
  //updatePixels();
  text("Training Data", 5, 20);
  text("Autoassociative recall", 5, 80);
  text("Iterations: " + frameCount*epochs, 5, 150);
}

final class SWNet {
  final int vecLen;
  final int widthBy;
  final int depth;
  final float[] params;
  final float[] surface;
  final float[] work;
  final float[] flipsIn;
  final float[] flipsOut;
  float rate;
  SWNet(int vecLen, int widthBy, int depth, float rate) {
    this.vecLen = vecLen;
    this.widthBy=widthBy; // Increase the width to allow full information flow through each layer.
    this.depth = depth;
    this.rate=rate;
    params = new float[2*vecLen*widthBy*depth];
    surface=new float[vecLen*widthBy*depth];
    work=new float[vecLen*widthBy];
    flipsIn=new float[vecLen*widthBy];
    flipsOut=new float[vecLen];
    for (int i = 0; i < params.length; i++) {
      params[i] = randomGaussian();
    }
    int r = 0x9E3779B9;
    for (int i=0; i<vecLen*widthBy; i++) {
	  r=r*1664525+1013904223; // Numerical receipes LCG random number generator
      flipsIn[i] = (r & 1<<16) == 0 ? -1f : 1f; // bit 16, higher bits more random
    }
    for (int i=0; i<vecLen; i++) {
	  r=r*1664525+1013904223;
      flipsOut[i] = (r & 1<<16) == 0 ? -1f : 1f;
    }
  }

  void recall(float[] result, float[] input) {
    final int n=vecLen*widthBy;
    for (int i=0; i<n; i++) {
      work[i]=flipsIn[i]*input[i&(vecLen-1)]; //vecLen-1 acts as a binary mask
    }
    whtN(work); // Quasi Random Projection of input, the spectral response aspect of the transform is not need
    int paIdx = 0;
    for (int i = 0; i < depth; i++) {
      for (int j = 0; j < n; j ++) {
        float x= work[j];
        if (x < 0f) {
          work[j]=x*params[paIdx];
        } else {
          work[j]=x*params[paIdx+1];
        }
        paIdx +=2;
      }
      whtN(work);
    }
    for(int i=0;i<vecLen;i++){
      result[i]=work[i]*flipsOut[i];
    }  
  }

  void train(float[] target, float[] input) {
    final int n=vecLen*widthBy; // Forward pass
    for (int i=0; i<n; i++) {
      work[i]=flipsIn[i]*input[i&(vecLen-1)];
    }
    whtN(work);
    int paIdx = 0;
    for (int i = 0; i < depth; i++) {
      System.arraycopy(work, 0, surface, i*n, n); // store layer outputs 
      for (int j = 0; j < n; j ++) {
        float x= work[j];
        if (x < 0) {
          work[j]=x*params[paIdx];
        } else {
          work[j]=x*params[paIdx+1];
        }
        paIdx +=2;
      }
      whtN(work);
    }
    for(int i=0;i<vecLen;i++){
      work[i]=work[i]*flipsOut[i];
    }  
    for (int i=0; i<vecLen; i++) {
      work[i]=(target[i]-work[i])*rate; //error by learning rate
    }
    for(int i=0;i<vecLen;i++){
      work[i]=work[i]*flipsOut[i];
    } 
    for (int i=vecLen; i<n; i++) {    // clear upper
      work[i]=0f;
    }
    for (int i = depth-1; i >=0; i--) {
      whtN(work);  // invert error back
      paIdx=i*n*2;
      int surIdx=i*n;
      for (int j = 0; j < n; j ++,paIdx+=2) {
        float x=surface[surIdx+j];
        float e=work[j]; // error
        if (x < 0f) {    
          params[paIdx]+=x*e;
          work[j]*=params[paIdx];
        } else {
          params[paIdx+1]+=x*e;
          work[j]*=params[paIdx+1];
        }
      }
    }
  }
}


// Fast Walsh Hadamard Transform (normalized for scale.)
void whtN(float[] vec) {
  final int n = vec.length;
  int hs = 1;
  while (hs < n) {
    int i = 0;
    while (i < n) {
      final int j = i + hs;  // final here is good hint to hotspot
      while (i < j) {
        float a = vec[i];
        float b = vec[i + hs];
        vec[i] = a + b;
        vec[i + hs] = a - b;
        i += 1;
      }
      i += hs;
    }
    hs += hs;
  }
  float sc=1f/sqrt(n);  // scale to leave overall vector magnitude unchanged
  for (int i=0; i<n; i++) {
    vec[i]*=sc;
  }
}

Further experiments:
https://archive.org/details/switch-nets

sableraph · February 24, 2025, 1:24pm

This looks really interesting, thanks for sharing!

I’m trying to get a clearer picture of how this could be applied in practice. Could you share some concrete examples of where this technique might be useful, especially in ways that are easy to grasp for someone without a deep background in neural networks?

By the way, have you considered turning it into a Processing library? We have a library template and detailed instructions on creating a library.

2025-02-24 14.16.02

th75 · February 24, 2025, 5:51pm

thanks for sharing.
it s fascinating this converge with a so simple ‘neuron’, an analogic electronic version (5 resistors 2 transistor by node i think) will be so fast !!
( the computer version is so efficient i didn’t realize first, i added frameRate(1); to see the learning progression)

edit: found a patent about an implemenattion in 1972 : US3775602A - Real time walsh-hadamard transformation of two-dimensional discrete pictures - Google Patents
and it looks recent asic implementation too…
thanks again

seanc4s · February 25, 2025, 1:08pm

I might put it in a library since the suggestion has been made.
I think next I will do a Javascript processing version.
I tidied up the code a bit:

color c1;
int epochs;
float[][] ex;
float[] work = new float[256];
SWNet net;

void setup() {
  size(400, 400);
  net = new SWNet(256, 4, 4, 0.1);//vecLen,widthBy,depth,learning rate
  epochs=25; //Epochs per frame
  c1 = color(255, 255, 255);
  ex=new float[8][256];
  for (int i = 0; i < 127; i++) {
    // Training data
    float t = (i * 2 * PI) / 127;
    ex[0][2 * i] = sin(t);
    ex[0][2 * i + 1] = sin(2 * t);
    ex[1][2 * i] = sin(2 * t);
    ex[1][2 * i + 1] = sin(t);
    ex[2][2 * i] = sin(2 * t);
    ex[2][2 * i + 1] = sin(3 * t);
    ex[3][2 * i] = sin(3 * t);
    ex[3][2 * i + 1] = sin(2 * t);
    ex[4][2 * i] = sin(3 * t);
    ex[4][2 * i + 1] = sin(4 * t);
    ex[5][2 * i] = sin(4 * t);
    ex[5][2 * i + 1] = sin(3 * t);
    ex[6][2 * i] = sin(2 * t);
    ex[6][2 * i + 1] = sin(5 * t);
    ex[7][2 * i] = sin(5 * t);
    ex[7][2 * i + 1] = sin(2 * t);
  }
  textSize(16);
  // frameRate(5);
}

void draw() {
  background(0);
  for (int k=0; k<epochs; k++) {
    for (int i = 0; i < ex.length; i++) {
      net.train(ex[i], ex[i]);
    }
  }
  for (int i = 0; i < 8; i++) {
    for (int j = 0; j < 255; j += 2) {
      set(25 + i * 40 + int(18 * ex[i][j]), 44 + int(18 * ex[i][j + 1]), c1);
    }
  }
  for (int i = 0; i < 8; i++) {
    net.recall(work, ex[i]);
    for (int j = 0; j < 255; j += 2) {
      set(25 + i * 40 +int( 18 * work[j]), 104 + int(18 * work[j + 1]), c1);
    }
  }
  text("Training Data", 5, 20);
  text("Autoassociative recall", 5, 80);
  text("Iterations: " + frameCount*epochs, 5, 150);
}

final class SWNet {
  final int vecLen;
  final int widthBy;
  final int depth;
  final float[] params;
  final float[] surface;
  final float[] work;
  final float[] flipsIn;
  final float[] flipsOut;
  float rate;
  SWNet(int vecLen, int widthBy, int depth, float rate) {
    this.vecLen = vecLen;
    this.widthBy=widthBy;
    this.depth = depth;
    this.rate=rate;
    params = new float[2*vecLen*widthBy*depth];
    surface=new float[vecLen*widthBy*depth];
    work=new float[vecLen*widthBy];
    flipsIn=new float[vecLen*widthBy];
    flipsOut=new float[vecLen];
    for (int i = 0; i < params.length; i++) {
      params[i] = randomGaussian();
    }
    int r = 0x9E3779B9;
    for (int i=0; i<vecLen*widthBy; i++) {
      r=r*1664525+1013904223; // Numerical receipes LCG random number generator
      flipsIn[i] = (r & (1<<16)) == 0 ? -1f : 1f; // bit 16, higher bits more random
    }
    for (int i=0; i<vecLen; i++) {
      r=r*1664525+1013904223;
      flipsOut[i] = (r & (1<<16)) == 0 ? -1f : 1f;
    }
  }

  void recall(float[] result, float[] input) {
    final int n=vecLen*widthBy;
    final boolean t=result==work;  //training
    for (int i=0; i<n; i++) {
      work[i]=flipsIn[i]*input[i&(vecLen-1)]; //vecLen-1 acts as a binary mask
    }
    whtN(work);
    for (int i = 0, paIdx=0; i < depth; i++) {
      if(t) System.arraycopy(work, 0, surface, i*n, n);
      for (int j = 0; j < n; j ++, paIdx+=2) {
        if (work[j] < 0f) {
          work[j] *=params[paIdx];
        } else {
          work[j] *=params[paIdx+1];
        }
      }
      whtN(work);
    }
    if(t) return;
    for (int i=0; i<vecLen; i++) {
      result[i]=work[i]*flipsOut[i];
    }
  }

  void train(float[] target, float[] input) {
    final int n=vecLen*widthBy;
    recall(work,input);
    for (int i=0; i<vecLen; i++) {
      work[i]=(target[i]*flipsOut[i]-work[i])*rate;
    }
    for (int i=vecLen; i<n; i++) {    // clear upper
      work[i]=0f;
    }
    for (int i = depth-1; i >=0; i--) {
      whtN(work);
      for (int j = 0, surIdx=i*n, paIdx=2*i*n; j < n; j ++, paIdx+=2) {
        float x=surface[surIdx+j];
        if (x < 0f) {
          params[paIdx]+=x*work[j];
          work[j] *=params[paIdx];
        } else {
          params[paIdx+1]+=x*work[j];
          work[j] *=params[paIdx+1];
        }
      }
    }
  }
}

// Fast Walsh Hadamard Transform (normalized for scale.)
void whtN(float[] vec) {
  final int n = vec.length;
  int hs = 1;
  while (hs < n) {
    int i = 0;
    while (i < n) {
      final int j = i + hs;  // final here is good hint to hotspot
      while (i < j) {
        float a = vec[i];
        float b = vec[i + hs];
        vec[i] = a + b;
        vec[i + hs] = a - b;
        i += 1;
      }
      i += hs;
    }
    hs += hs;
  }
  float sc=1f/sqrt(n);  // scale to leave overall vector magnitude unchanged
  for (int i=0; i<n; i++) {
    vec[i]*=sc;
  }
}

I think I will create some other examples. I haven’t really done much coding for a year or so. And I don’t want to sit coding for 8 hours non-stop per day either. I think a little bit every day and improve the quality of code I create.
I have a little website with some background information:
https://sites.google.com/view/algorithmshortcuts/switchnet

seanc4s · February 25, 2025, 1:15pm

I should say vecLen the length of the input,output and target arrays should be some positive integer power of 2 like 64,128,256,…
Likewise widthBy should be 1,2,4,8,16,…
The (learning) rate maybe can be 0.1 for small networks, substantially smaller for deep nets like 0.001.

seanc4s · February 25, 2025, 1:33pm

One application would as very wide input layers for a neural network that was dealing with say super high resolution images.
Or where you need very fast neural networks for real-time control of some machine or whatever.
I’m not really sure how to start explaining it at a very basic level.
I think if you start with the idea that the ReLU neural network activation function is a switch:
https://sites.google.com/view/algorithmshortcuts/relu-as-a-switch
And then explain how weighted sums can be connected up as weighted sum switching networks. And then kind of go through the (simple) linear algebra of that.

Maybe it is simpler to say that weighting the inputs to a fast transform allows that transform to Approximate a full neural network weight matrix quite well. If the internal width of the network is say 256, then there are 2 to the power of 256 different switching decisions. That is an absolutely astronomical number of full neural network weight matrix approximations. And surely one of those will approximately do for some task you are training the network for.

seanc4s · February 28, 2025, 2:35am

I did a Processing p5.js version with 256 training examples.
You try to recall a sine wave given a cosine wave.
https://editor.p5js.org/seanhaddps/sketches/TlfJQFFxU

Also I tried to explain how fast transforms can turn simple weighting of a vector into rotations.
https://sites.google.com/view/algorithmshortcuts/uniform-point-picking-on-a-sphere
That is only a mathematical outline, that some computer scientist can fill in (or disprove!) Up to them.

seanc4s · March 20, 2025, 1:34am

I have done a dense layer version of the SwitchNet neural network.
This allows you to exceed the n squared layer parameter limit of a conventional neural network layer or you may make the layers sparser than in a conventional neural network layer.
Anyway you get far more switching per parameter than in a conventional say ReLU based neural network.
[Switch Net DL : Sean O'Connor : Free Download, Borrow, and Streaming : Internet Archive]
(Switch Net DL : Sean O'Connor : Free Download, Borrow, and Streaming : Internet Archive)
I definitely struggled writing the code, for one reason, not having coded anything much for a long time. I would call the code alpha stage.
It is unpaid work so things tend to proceed very slowly. Like years of delay, literally.
There could be entities out there that were not so delayed.

seanc4s · March 21, 2025, 12:46am

I wrote a P5.js on-line version.
https://sites.google.com/view/algorithmshortcuts/switchnet-dense-layer

noahbuddy · April 3, 2025, 8:12pm

@seanc4s Very interesting

At a high level, Switchnet makes perfect sense to me if I think of it as filtering.
If using an FFT, we would be altering the spectrum.

I do not fully understand the low-level.
I think your network is performing something similar to backpropagation but taking a shortcut because the WHT is reversible.

Do you have an example of an Exclusive-OR Switchnet? (a sort “hello world” for training artificial neural networks)

Personally, I am more interested in your locality-sensitive-hash method.
Looking at it as an ANN, we only need a single perceptron, fully connected to a fixed point (once trained) in high dimensional space. (network input)
The user input is then mapped to the nearest corner of a hypercube. (the vector to that corner holds the weights of the perceptron)
The dot product of the network input with the corner vector gives the stored value.

From my experiments, the user inputs need to be orthogonal in order to successfully train all inputs and outputs.
“One-hot” inputs (a single axis is non-zero), sine waves of different frequencies and indeed, rows of a Hadamard matrix are orthogonal.

Admittedly, I do not yet have an intuition for the random flips.
Perhaps there is something I am missing.
Aren’t all inputs getting “rotated” by the same amount?

I do not mean to nit-pick.
I want to understand.

Topic		Replies	Views
Switch Net 4 - Neural Network Gallery	18	1641	March 29, 2022
Flaw in current neural networks Gallery	10	2087	June 29, 2019
Fixed filter bank neural networks Gallery	1	643	August 18, 2019
Neural Network - Switch Net with back-propagation Gallery	1	185	May 9, 2023
Neural network help: AI doesn't improve Coding Questions	3	773	June 24, 2020

SwitchNet Neural network examples

Related topics