Switch Net 4 - Neural Network

seanc4s · November 1, 2021, 10:37am

Multiple small width 4 neural network layers knitted together with the fast Walsh Hadamard transform acting as a connectionist device.

final class SWNet4 {
  final int vecLen;
  final int depth;
  final float scale;
  final float[] params;
  final float[] flips;
  SWNet4(int vecLen, int depth) {
    this.vecLen = vecLen;
    this.depth = depth;
    scale = 1f / sqrt(vecLen);
    params = new float[8 * vecLen * depth];
    flips = new float[vecLen];
    for (int i = 0; i < vecLen; i++) {
      flips[i] = sin(i * 1.895) < 0 ? -scale : scale;
    }
    for (int i = 0, j=0; i < params.length; i += 8) {
      params[i+j] = scale;
      params[i+j + 4] = scale;
      j=(j+1)&3;
    }
  }
  void recall(float[] result, float[] input) {
    for (int i = 0; i < vecLen; i++) {
      result[i] = input[i] * flips[i];
    }
    int paIdx = 0;
    for (int i = 0; i < depth; i++) {
      wht(result);
      for (int j = 0; j < vecLen; j += 4) {
        float x, a, b, c, d;
        x= result[j];
        if (x < 0) {
          a = x * this.params[paIdx];
          b = x * this.params[paIdx + 1];
          c = x * this.params[paIdx + 2];
          d = x * this.params[paIdx + 3];
        } else {
          a = x * this.params[paIdx + 4];
          b = x * this.params[paIdx + 5];
          c = x * this.params[paIdx + 6];
          d = x * this.params[paIdx + 7];
        }
        paIdx += 8;
        x = result[j + 1];
        if (x < 0) {
          a += x * this.params[paIdx];
          b += x * this.params[paIdx + 1];
          c += x * this.params[paIdx + 2];
          d += x * this.params[paIdx + 3];
        } else {
          a += x * this.params[paIdx + 4];
          b += x * this.params[paIdx + 5];
          c += x * this.params[paIdx + 6];
          d += x * this.params[paIdx + 7];
        }
        paIdx += 8;
        x = result[j + 2];
        if (x < 0) {
          a += x * this.params[paIdx];
          b += x * this.params[paIdx + 1];
          c += x * this.params[paIdx + 2];
          d += x * this.params[paIdx + 3];
        } else {
          a += x * this.params[paIdx + 4];
          b += x * this.params[paIdx + 5];
          c += x * this.params[paIdx + 6];
          d += x * this.params[paIdx + 7];
        }
        paIdx += 8;
        x = result[j + 3];
        if (x < 0) {
          a += x * this.params[paIdx];
          b += x * this.params[paIdx + 1];
          c += x * this.params[paIdx + 2];
          d += x * this.params[paIdx + 3];
        } else {
          a += x * this.params[paIdx + 4];
          b += x * this.params[paIdx + 5];
          c += x * this.params[paIdx + 6];
          d += x * this.params[paIdx + 7];
        }
        paIdx += 8;
        result[j] = a;
        result[j + 1] = b;
        result[j + 2] = c;
        result[j + 3] = d;
      }
    }
    wht(result);
  }
}
// Fast Walsh Hadamard Transform
void wht(float[] vec) {
  final int n = vec.length;
  for (int i=0; i<n; i+=8) {
    float a=vec[i];
    float b=vec[i+1];
    float c=vec[i+2];
    float d=vec[i+3];
    float e=vec[i+4];
    float f=vec[i+5];
    float g=vec[i+6];
    float h=vec[i+7];
    float t=a;
    a=a+b;
    b=t-b;
    t=c;
    c=c+d;
    d=t-d;
    t=a;
    a=a+c;
    c=t-c;
    t=b;
    b=b+d;
    d=t-d;
    t=e;
    e=e+f;
    f=t-f;
    t=g;
    g=g+h;
    h=t-h;
    t=e;
    e=e+g;
    g=t-g;
    t=f;
    f=f+h;
    h=t-h;
    t=a;
    a=a+e;
    e=t-e;
    t=b;
    b=b+f;
    f=t-f;
    t=c;
    c=c+g;
    g=t-g;
    t=d;
    d=d+h;
    h=t-h;
    vec[i]=a;
    vec[i+1]=b;
    vec[i+2]=c;
    vec[i+3]=d;
    vec[i+4]=e;
    vec[i+5]=f;
    vec[i+6]=g;
    vec[i+7]=h;
  }
  int hs = 8;
  while (hs < n) {
    int i = 0;
    while (i < n) {
      final int j = i + hs;  // final here is good hint to hotspot
      while (i < j) {
        float a = vec[i];
        float b = vec[i + hs];
        vec[i] = a + b;
        vec[i + hs] = a - b;
        i += 1;
      }
      i += hs;
    }
    hs += hs;
  }
}

// Sum of squared difference cost
float costL2(float[] vec, float[] tar) {
  float cost = 0;
  for (int i = 0; i < vec.length; i++) {
    float e = vec[i] - tar[i];
    cost += e * e;
  }
  return cost;
}

class Mutator {
  float[] previous;
  int[] pIdx;
  float limit;
  Rnd256 rng;

  Mutator(int size, float limit) {
    previous = new float[size];
    pIdx = new int[size];
    this.limit = limit;
    rng=new Rnd256();
  }
  void mutate(float[] vec) {
    for (int i = 0; i < previous.length; i++) {
      int rpos = rng.nextIntEx(vec.length);
      float v = vec[rpos];
      pIdx[i] = rpos;
      previous[i] = v;
      float m = limit * rng.mutate();
      float vm = v + m;
      if (vm >= this.limit) vm = v;
      if (vm <= -this.limit) vm = v;
      vec[rpos] = vm;
    }
  }
  void undo(float[] vec) {
    for (int i = previous.length - 1; i >= 0; i--) {
      vec[pIdx[i]] = previous[i];
    }
  }
}

final class Rnd256 {
  final long PHI = 0x9E3779B97F4A7C15L;
  private long s0, s1, s2, s3;
  Rnd256() {
    this(System.nanoTime());
  }
  Rnd256(long seed) {
    s0=staffordMix13(seed+PHI);
    s1=staffordMix13(seed+2*PHI);
    s2=staffordMix13(seed+3*PHI);
    s3=staffordMix13(seed+4*PHI);
  }
  long nextLong() {
    long result = s1;
    result = Long.rotateLeft(result + (result << 2), 7);
    result += result << 3;
    final long t = s1 << 17;
    s2 ^= s0;
    s3 ^= s1;
    s1 ^= s2;
    s0 ^= s3;
    s2 ^= t;
    s3 = Long.rotateLeft(s3, 45);
    return result;
  }
  float nextFloat() {
    return (nextLong() >>> 40) * 0x1.0p-24f;
  }
  boolean nextBoolean() {
    return nextLong() < 0;
  }
  int nextIntEx(int ex) {
    long r=(nextLong()>>>32)*ex;
    return (int)(r>>>32);
  }
  int nextIntInc(int inc) {
    long r=(nextLong()>>>32)*(inc+1L);
    return (int)(r>>>32);
  }
  float mutate() {
    int r=(int)nextLong();
    r &=0xbfff_ffff;
    r |=0x3000_0000;
    return Float.intBitsToFloat(r);
  }
  long staffordMix13(long z) {
    z = (z ^ (z >>> 30)) * 0xBF58476D1CE4E5B9L;
    z = (z ^ (z >>> 27)) * 0x94D049BB133111EBL;
    return z ^ (z >>> 31);
  }
}

// Test with Lissajous curves
color c1;
float[][] ex;
float[] work = new float[256];
float parentCost = Float.POSITIVE_INFINITY;
SWNet4 parentNet;
Mutator mut;

void setup() {
  size(400, 400);
  parentNet = new SWNet4(256, 4);
  mut = new Mutator(15, parentNet.scale);
  c1 = color(0xff,0xd7,00);
  ex=new float[8][256];
  for (int i = 0; i < 127; i++) {
    // Training data
    float t = (i * 2 * PI) / 127;
    ex[0][2 * i] = sin(t);
    ex[0][2 * i + 1] = sin(2 * t);
    ex[1][2 * i] = sin(2 * t);
    ex[1][2 * i + 1] = sin(t);
    ex[2][2 * i] = sin(2 * t);
    ex[2][2 * i + 1] = sin(3 * t);
    ex[3][2 * i] = sin(3 * t);
    ex[3][2 * i + 1] = sin(2 * t);
    ex[4][2 * i] = sin(3 * t);
    ex[4][2 * i + 1] = sin(4 * t);
    ex[5][2 * i] = sin(4 * t);
    ex[5][2 * i + 1] = sin(3 * t);
    ex[6][2 * i] = sin(2 * t);
    ex[6][2 * i + 1] = sin(5 * t);
    ex[7][2 * i] = sin(5 * t);
    ex[7][2 * i + 1] = sin(2 * t);
  }
  textSize(16);
}

void draw() {
  background(0);
  loadPixels();
  for (int i = 0; i < 100; i++) {
    mut.mutate(parentNet.params);
    float cost = 0;
    for (int j = 0; j < 8; j++) {
      parentNet.recall(work, ex[j]);
      cost += costL2(work, ex[j]); // autoassociate
    }
    if (cost < parentCost) {
      parentCost = cost;
    } else {
      mut.undo(parentNet.params);
    }
  }
  for (int i = 0; i < 8; i++) {
    for (int j = 0; j < 255; j += 2) {
      set(25 + i * 40 + int(18 * ex[i][j]), 44 + int(18 * ex[i][j + 1]),c1);
    }
  }
  for (int i = 0; i < 8; i++) {
    parentNet.recall(work, ex[i]);
    for (int j = 0; j < 255; j += 2) {
      set(25 + i * 40 +int( 18 * work[j]), 104 + int(18 * work[j + 1]),c1);
    }
  }
  //updatePixels();
  text("Training Data", 5, 20);
  text("Autoassociative recall", 5, 80);
  text("Iterations: " + frameCount, 5, 150);
  text("Cost: " + parentCost, 5, 170);
}

seanc4s · November 1, 2021, 9:33pm

There are some ‘this’ statements that I should have removed that were in the JavaScript version I did before. Like ‘this.params’ should be simplified to just params. It is still correct anyway. Don’t remove the this statements from the constructor though!

seanc4s · November 1, 2021, 9:51pm

Basically you are knitting together multiple tiny width 4 neural network layers using the fast Walsh Hadamard transform as a connectionist device, to make a deep neural network.
The mini-layer width doesn’t have to be 4, it could be 2, 8, 1, 16 or whatever.
I used Beyond ReLU for the neural layers, but you could use any non-linear neural layer type or even just some parametric function type.
In sum total the tiny layers use far less parameters and are far faster than a conventional layer would.
Beyond ReLU

seanc4s · November 2, 2021, 5:10am

The WHT is a connectionist device because it connects any single input to all the outputs coequally. It should be used more with sparse neural networks to provide that functionality. It can turn sparse things into fully connected things at a low computational cost.

jeremydouglass · November 2, 2021, 8:37pm

Thanks for sharing this. What is: 0x1.0p ?

seanc4s · November 3, 2021, 12:17am

There is an answer on stack overflow:
https://stackoverflow.com/questions/8603232/p-in-constant-declaration
Basically I copied the random number generator class from here:
https://prng.di.unimi.it/
I kept the method of generating floating point random numbers and modified some other things for my own use.
The mutate function basically is a hack for random + or - 2.exp(-c.rnd()) where rnd() returns random 0 to 1 and c is some positive constant. You can use it to provide mutations for parameters that are constrained between -1 and 1. That works well for evolving things.
There is a paper here:
http://www.cs.bham.ac.uk/~jer/papers/ctsgray.pdf

jeremydouglass · November 3, 2021, 2:46am

Ah – thanks for the link. It is Java notation for a hexidecimal floating point literal. That explains why it won’t compile in Processing 3.5.4 – the p suffix probably isn’t supported.

seanc4s · November 3, 2021, 8:15am

Yeh, I’m using the Processing version 4 beta which I think has move up to Java 11.
I prefer Java 2. Lol.

Anyway:
println(0x1.0p-24f);
Gives:
5.9604645E-8

println(0x1.0p-24f*0xffffff);
Gives: 0.99999994
0xffffff is 24 bits

println(5.9604645E-8f*0xffffff);
Also gives: 0.99999994
It should be safe to make the substitution.
I didn’t investigate the exact reasoning for the constant in the original code, I would have chosen a slightly faster conversion method myself. I think the CPU machine code instructions to convert long’s or int’s to floats are surprisingly fast, if I remember correctly. You could avoid the shift operator if you wanted and just multiply by a suitable constant, though there might be some slight statistical biases in the resulting random float.

seanc4s · November 3, 2021, 1:14pm

I’ve improved the code a little. Since the little width 4 neural layers provide 4 way connectivity themselves, you can lop a bit off the connecting WHTs and still have full connectivity. You still need a full WHT as the first step. To stop the first WHT from taking a spectrum (like the FFT spectrum meter you would see on a music player) you apply an arbitrary sub-random pattern of sign flips to the input data.
Then is is just a process of applying multiple width 4 layers, connecting those together with wht4() which is the shorten version of the WHT, more width 4 layers, connect with wht4 and so on.

// Multiple small width 4 neural network layers connected together using
// the fast Walsh Hadamard transform into a fully connected deep neural
// network.
final class SWNet4 {
  final int vecLen;
  final int depth;
  final float scale;
  final float[] params;
  final float[] flips;
  SWNet4(int vecLen, int depth) {
    this.vecLen = vecLen;
    this.depth = depth;
    scale = 2f / sqrt(vecLen);
    params = new float[8 * vecLen * depth];
    flips = new float[vecLen];
    for (int i = 0; i < vecLen; i++) {
      flips[i] = sin(i * 1.895) < 0 ? -0.5f*scale : 0.5f*scale;
    }
    for (int i = 0, j=0; i < params.length; i += 8) {
      params[i+j] = scale;
      params[i+j + 4] = scale;
      j=(j+1)&3;
    }
  }
  void recall(float[] result, float[] input) {
    for (int i = 0; i < vecLen; i++) {
      result[i] = input[i] * flips[i];
    }
    int paIdx = 0;
    wht(result);
    for (int i = 0; i < depth; i++) { 
      for (int j = 0; j < vecLen; j += 4) {
        float x, a, b, c, d;
        x= result[j];
        if (x < 0) {
          a = x * params[paIdx];
          b = x * params[paIdx + 1];
          c = x * params[paIdx + 2];
          d = x * params[paIdx + 3];
        } else {
          a = x * params[paIdx + 4];
          b = x * params[paIdx + 5];
          c = x * params[paIdx + 6];
          d = x * params[paIdx + 7];
        }
        paIdx += 8;
        x = result[j + 1];
        if (x < 0) {
          a += x * params[paIdx];
          b += x * params[paIdx + 1];
          c += x * params[paIdx + 2];
          d += x * params[paIdx + 3];
        } else {
          a += x * params[paIdx + 4];
          b += x * params[paIdx + 5];
          c += x * params[paIdx + 6];
          d += x * params[paIdx + 7];
        }
        paIdx += 8;
        x = result[j + 2];
        if (x < 0) {
          a += x * params[paIdx];
          b += x * params[paIdx + 1];
          c += x * params[paIdx + 2];
          d += x * params[paIdx + 3];
        } else {
          a += x * params[paIdx + 4];
          b += x * params[paIdx + 5];
          c += x * params[paIdx + 6];
          d += x * params[paIdx + 7];
        }
        paIdx += 8;
        x = result[j + 3];
        if (x < 0) {
          a += x * params[paIdx];
          b += x * params[paIdx + 1];
          c += x * params[paIdx + 2];
          d += x * params[paIdx + 3];
        } else {
          a += x * params[paIdx + 4];
          b += x * params[paIdx + 5];
          c += x * params[paIdx + 6];
          d += x * params[paIdx + 7];
        }
        paIdx += 8;
        result[j] = a;
        result[j + 1] = b;
        result[j + 2] = c;
        result[j + 3] = d;
      }
      wht4(result);
    }
  }
}
// The width 4 neural layers provide 4 way connectivity which means
// you only need do a partial Walsh Hadamard transform to get full
// connectivity, as provided by wht4().
void wht4(float[] vec){
  final int n = vec.length;
  for (int i=0; i<n; i+=8) {
    float a=vec[i];
    float b=vec[i+1];
    float c=vec[i+2];
    float d=vec[i+3];
    float e=vec[i+4];
    float f=vec[i+5];
    float g=vec[i+6];
    float h=vec[i+7];
    float t=a;
    a=a+e;
    e=t-e;
    t=b;
    b=b+f;
    f=t-f;
    t=c;
    c=c+g;
    g=t-g;
    t=d;
    d=d+h;
    h=t-h;
    vec[i]=a;
    vec[i+1]=b;
    vec[i+2]=c;
    vec[i+3]=d;
    vec[i+4]=e;
    vec[i+5]=f;
    vec[i+6]=g;
    vec[i+7]=h;
  }  
  int hs = 8;
  while (hs < n) {
    int i = 0;
    while (i < n) {
      final int j = i + hs;  // final here is good hint to hotspot
      while (i < j) {
        float a = vec[i];
        float b = vec[i + hs];
        vec[i] = a + b;
        vec[i + hs] = a - b;
        i += 1;
      }
      i += hs;
    }
    hs += hs;
  }
}  
// Full Fast Walsh Hadamard Transform
void wht(float[] vec) {
  final int n = vec.length;
  for (int i=0; i<n; i+=8) {
    float a=vec[i];
    float b=vec[i+1];
    float c=vec[i+2];
    float d=vec[i+3];
    float e=vec[i+4];
    float f=vec[i+5];
    float g=vec[i+6];
    float h=vec[i+7];
    float t=a;
    a=a+b;
    b=t-b;
    t=c;
    c=c+d;
    d=t-d;
    t=a;
    a=a+c;
    c=t-c;
    t=b;
    b=b+d;
    d=t-d;
    t=e;
    e=e+f;
    f=t-f;
    t=g;
    g=g+h;
    h=t-h;
    t=e;
    e=e+g;
    g=t-g;
    t=f;
    f=f+h;
    h=t-h;
    t=a;
    a=a+e;
    e=t-e;
    t=b;
    b=b+f;
    f=t-f;
    t=c;
    c=c+g;
    g=t-g;
    t=d;
    d=d+h;
    h=t-h;
    vec[i]=a;
    vec[i+1]=b;
    vec[i+2]=c;
    vec[i+3]=d;
    vec[i+4]=e;
    vec[i+5]=f;
    vec[i+6]=g;
    vec[i+7]=h;
  }
  int hs = 8;
  while (hs < n) {
    int i = 0;
    while (i < n) {
      final int j = i + hs;  // final here is good hint to hotspot
      while (i < j) {
        float a = vec[i];
        float b = vec[i + hs];
        vec[i] = a + b;
        vec[i + hs] = a - b;
        i += 1;
      }
      i += hs;
    }
    hs += hs;
  }
}

// Sum of squared difference cost
float costL2(float[] vec, float[] tar) {
  float cost = 0;
  for (int i = 0; i < vec.length; i++) {
    float e = vec[i] - tar[i];
    cost += e * e;
  }
  return cost;
}

class Mutator {
  float[] previous;
  int[] pIdx;
  float limit;
  Rnd256 rng;

  Mutator(int size, float limit) {
    previous = new float[size];
    pIdx = new int[size];
    this.limit = limit;
    rng=new Rnd256();
  }
  void mutate(float[] vec) {
    for (int i = 0; i < previous.length; i++) {
      int rpos = rng.nextIntEx(vec.length);
      float v = vec[rpos];
      pIdx[i] = rpos;
      previous[i] = v;
      float m = limit * rng.mutate();
      float vm = v + m;
      if (vm >= this.limit) vm = v;
      if (vm <= -this.limit) vm = v;
      vec[rpos] = vm;
    }
  }
  void undo(float[] vec) {
    for (int i = previous.length - 1; i >= 0; i--) {
      vec[pIdx[i]] = previous[i];
    }
  }
}

final class Rnd256 {
  final long PHI = 0x9E3779B97F4A7C15L;
  private long s0, s1, s2, s3;
  Rnd256() {
    this(System.nanoTime());
  }
  Rnd256(long seed) {
    s0=staffordMix13(seed+PHI);
    s1=staffordMix13(seed+2*PHI);
    s2=staffordMix13(seed+3*PHI);
    s3=staffordMix13(seed+4*PHI);
  }
  long nextLong() {
    long result = s1;
    result = Long.rotateLeft(result + (result << 2), 7);
    result += result << 3;
    final long t = s1 << 17;
    s2 ^= s0;
    s3 ^= s1;
    s1 ^= s2;
    s0 ^= s3;
    s2 ^= t;
    s3 = Long.rotateLeft(s3, 45);
    return result;
  }
  float nextFloat() {
    return (nextLong() >>> 40) * 5.9604645E-8f;
  }
  boolean nextBoolean() {
    return nextLong() < 0;
  }
  int nextIntEx(int ex) {
    long r=(nextLong()>>>32)*ex;
    return (int)(r>>>32);
  }
  int nextIntInc(int inc) {
    long r=(nextLong()>>>32)*(inc+1L);
    return (int)(r>>>32);
  }
  float mutate() {
    int r=(int)nextLong();
    r &=0xbfff_ffff;
    r |=0x3000_0000;
    return Float.intBitsToFloat(r);
  }
  long staffordMix13(long z) {
    z = (z ^ (z >>> 30)) * 0xBF58476D1CE4E5B9L;
    z = (z ^ (z >>> 27)) * 0x94D049BB133111EBL;
    return z ^ (z >>> 31);
  }
}

// Test with Lissajous curves
color c1;
float[][] ex;
float[] work = new float[256];
float parentCost = Float.POSITIVE_INFINITY;
SWNet4 parentNet;
Mutator mut;

void setup() {
  size(400, 400);
  parentNet = new SWNet4(256, 4);
  mut = new Mutator(10, parentNet.scale);
  c1 = color(0xff,0xd7,00);
  ex=new float[8][256];
  for (int i = 0; i < 127; i++) {
    // Training data
    float t = (i * 2 * PI) / 127;
    ex[0][2 * i] = sin(t);
    ex[0][2 * i + 1] = sin(2 * t);
    ex[1][2 * i] = sin(2 * t);
    ex[1][2 * i + 1] = sin(t);
    ex[2][2 * i] = sin(2 * t);
    ex[2][2 * i + 1] = sin(3 * t);
    ex[3][2 * i] = sin(3 * t);
    ex[3][2 * i + 1] = sin(2 * t);
    ex[4][2 * i] = sin(3 * t);
    ex[4][2 * i + 1] = sin(4 * t);
    ex[5][2 * i] = sin(4 * t);
    ex[5][2 * i + 1] = sin(3 * t);
    ex[6][2 * i] = sin(2 * t);
    ex[6][2 * i + 1] = sin(5 * t);
    ex[7][2 * i] = sin(5 * t);
    ex[7][2 * i + 1] = sin(2 * t);
  }
  textSize(16);
}

void draw() {
  background(0);
  loadPixels();
  for (int i = 0; i < 300; i++) {
    mut.mutate(parentNet.params);
    float cost = 0;
    for (int j = 0; j < 8; j++) {
      parentNet.recall(work, ex[j]);
      cost += costL2(work, ex[j]); // autoassociate
    }
    if (cost < parentCost) {
      parentCost = cost;
    } else {
      mut.undo(parentNet.params);
    }
  }
  for (int i = 0; i < 8; i++) {
    for (int j = 0; j < 255; j += 2) {
      set(25 + i * 40 + int(18 * ex[i][j]), 44 + int(18 * ex[i][j + 1]),c1);
    }
  }
  for (int i = 0; i < 8; i++) {
    parentNet.recall(work, ex[i]);
    for (int j = 0; j < 255; j += 2) {
      set(25 + i * 40 +int( 18 * work[j]), 104 + int(18 * work[j + 1]),c1);
    }
  }
  //updatePixels();
  text("Training Data", 5, 20);
  text("Autoassociative recall", 5, 80);
  text("Iterations: " + frameCount, 5, 150);
  text("Cost: " + parentCost, 5, 170);
}

seanc4s · November 4, 2021, 1:35am

The central limit theorem says that if you add together a bunch of random numbers they will take on the Gaussian/Normal/Bell Curve distribution.
It also applies to where you add and subtract a bunch of random numbers, which is what the Walsh Hadamard transform does. There is a slight degree of entanglement between the Gaussian numbers generated because the WHT leaves vector length unchanged or only scaled by a constant, depending on how you set it up.

void setup(){
  size(600,300);
  background(0);
  Rnd256 rnd=new Rnd256();
  float[] u=new float[65536];
  float[] g=new float[65536];
  for(int i=0;i<65535;i++){
    u[i]=g[i]=2f*rnd.nextFloat()-1f;
   // u[i]=g[i]=random(-1f,1f); // Shows poor performance of random(...)
  }
  wht(g);
  for(int i=0;i<65536;i+=2){
    set(140+int(u[i]*130f),140+int(u[i+1]*130f),0xffffd700);
    set(440+int(g[i]*0.25f),140+int(g[i+1]*0.25f),0xffffd700);
  }
  noLoop();
}  

void wht(float[] vec) {
  final int n = vec.length;
  for (int i=0; i<n; i+=8) {
    float a=vec[i];
    float b=vec[i+1];
    float c=vec[i+2];
    float d=vec[i+3];
    float e=vec[i+4];
    float f=vec[i+5];
    float g=vec[i+6];
    float h=vec[i+7];
    float t=a;
    a=a+b;
    b=t-b;
    t=c;
    c=c+d;
    d=t-d;
    t=a;
    a=a+c;
    c=t-c;
    t=b;
    b=b+d;
    d=t-d;
    t=e;
    e=e+f;
    f=t-f;
    t=g;
    g=g+h;
    h=t-h;
    t=e;
    e=e+g;
    g=t-g;
    t=f;
    f=f+h;
    h=t-h;
    t=a;
    a=a+e;
    e=t-e;
    t=b;
    b=b+f;
    f=t-f;
    t=c;
    c=c+g;
    g=t-g;
    t=d;
    d=d+h;
    h=t-h;
    vec[i]=a;
    vec[i+1]=b;
    vec[i+2]=c;
    vec[i+3]=d;
    vec[i+4]=e;
    vec[i+5]=f;
    vec[i+6]=g;
    vec[i+7]=h;
  }
  int hs = 8;
  while (hs < n) {
    int i = 0;
    while (i < n) {
      final int j = i + hs;  // final here is good hint to hotspot
      while (i < j) {
        float a = vec[i];
        float b = vec[i + hs];
        vec[i] = a + b;
        vec[i + hs] = a - b;
        i += 1;
      }
      i += hs;
    }
    hs += hs;
  }
}

final class Rnd256 {
  final long PHI = 0x9E3779B97F4A7C15L;
  private long s0, s1, s2, s3;
  Rnd256() {
    this(System.nanoTime());
  }
  Rnd256(long seed) {
    s0=staffordMix13(seed+PHI);
    s1=staffordMix13(seed+2*PHI);
    s2=staffordMix13(seed+3*PHI);
    s3=staffordMix13(seed+4*PHI);
  }
  long nextLong() {
    long result = s1;
    result = Long.rotateLeft(result + (result << 2), 7);
    result += result << 3;
    final long t = s1 << 17;
    s2 ^= s0;
    s3 ^= s1;
    s1 ^= s2;
    s0 ^= s3;
    s2 ^= t;
    s3 = Long.rotateLeft(s3, 45);
    return result;
  }
  float nextFloat() {
    return (nextLong() >>> 40) * 5.9604645E-8f;
  }
  boolean nextBoolean() {
    return nextLong() < 0;
  }
  int nextIntEx(int ex) {
    long r=(nextLong()>>>32)*ex;
    return (int)(r>>>32);
  }
  int nextIntInc(int inc) {
    long r=(nextLong()>>>32)*(inc+1L);
    return (int)(r>>>32);
  }
  float mutate() {
    int r=(int)nextLong();
    r &=0xbfff_ffff;
    r |=0x3000_0000;
    return Float.intBitsToFloat(r);
  }
  long staffordMix13(long z) {
    z = (z ^ (z >>> 30)) * 0xBF58476D1CE4E5B9L;
    z = (z ^ (z >>> 27)) * 0x94D049BB133111EBL;
    return z ^ (z >>> 31);
  }
}

I had to use an external random number generator because the standard one in Java is maybe not the best and you get artifacts.
Anyway natural data transformed by the WHT gives a biased spectrum like response, that you can use for data compression even. However disordering the natural data a bit gives a more Gaussian distribution response.

seanc4s · November 4, 2021, 11:47am

Image compression using the fast Walsh Hadamard transform.
You transform the image data. Select the top-k highest magnitude outputs of the transform and store them. To decompress place the selected outputs back in their original position in an otherwise zeroed array. Then do the WHT again (it is self-inverse) to get an approximation of the original image.
I used positive only numbers for the image pixels, it is probably better to use ± pixel values centered around zero.
For some applications of the WHT you don’t actually want information concentrating into a few dimensions, which allows compression. In those cases you can use prior random sign flipping of the input information or random permutations. That way you can avoid the concentrating effect.

PImage img;

void setup() {
  size(522, 265);
  background(0);
  int compressNumber=6553;
  float[] data=new float[65536];
  float[] decompressData=new float[65536];
  Item[] itm=new Item[65536];
  img = loadImage("https://www.culture-et-formation.fr/wp-content/uploads/2010/01/cat-256x2564.png");
  for (int i=0; i<65536; i++) {
    int c=img.get(i&255, i>>8);
    float bw=0.3333*(red(c)+green(c)+blue(c));
    data[i]=bw;
    set(i&255, i>>8, color((int)bw));
  }
  //
  wht(data);
  for (int i=0; i<65536; i++) {
    itm[i]=new Item(i, data[i]);
  }
  java.util.Arrays.sort(itm); // find the highest magnitude spectral components
  float scale=1f/65536f; //WHT scaling factor twice
  for (int i=0; i<compressNumber; i++) {
    decompressData[itm[i].pos]=itm[i].val*scale;// place highest magnitude items
  }
  wht(decompressData); // Do the decompression
  for (int i=0; i<65536; i++) {
    set(265+(i&255), i>>8, color(constrain(int(decompressData[i]),0,255)));
  }
  noLoop();
}
class Item implements Comparable {
  int pos;
  float val;
  Item(int pos, float val) {
    this.pos=pos;
    this.val=val;
  }
  int compareTo(Object x) {  // allow sort by magnitude
    float a=abs(val);
    float b=abs(((Item)x).val);
    if (a==b) return 0;
    return a<b? 1:-11;
  }
}

void wht(float[] vec) {
  final int n = vec.length;
  for (int i=0; i<n; i+=8) {
    float a=vec[i];
    float b=vec[i+1];
    float c=vec[i+2];
    float d=vec[i+3];
    float e=vec[i+4];
    float f=vec[i+5];
    float g=vec[i+6];
    float h=vec[i+7];
    float t=a;
    a=a+b;
    b=t-b;
    t=c;
    c=c+d;
    d=t-d;
    t=a;
    a=a+c;
    c=t-c;
    t=b;
    b=b+d;
    d=t-d;
    t=e;
    e=e+f;
    f=t-f;
    t=g;
    g=g+h;
    h=t-h;
    t=e;
    e=e+g;
    g=t-g;
    t=f;
    f=f+h;
    h=t-h;
    t=a;
    a=a+e;
    e=t-e;
    t=b;
    b=b+f;
    f=t-f;
    t=c;
    c=c+g;
    g=t-g;
    t=d;
    d=d+h;
    h=t-h;
    vec[i]=a;
    vec[i+1]=b;
    vec[i+2]=c;
    vec[i+3]=d;
    vec[i+4]=e;
    vec[i+5]=f;
    vec[i+6]=g;
    vec[i+7]=h;
  }
  int hs = 8;
  while (hs < n) {
    int i = 0;
    while (i < n) {
      final int j = i + hs;  // final here is good hint to hotspot
      while (i < j) {
        float a = vec[i];
        float b = vec[i + hs];
        vec[i] = a + b;
        vec[i + hs] = a - b;
        i += 1;
      }
      i += hs;
    }
    hs += hs;
  }
}

seanc4s · November 4, 2021, 12:07pm

Timing for the WHT function:

void setup() {
  float[] x=new float[65536];
  long t1=System.nanoTime();
  for (int i=0; i<10000; i++) {
    wht(x);
  }
  long t2=System.nanoTime();
  float sec=(t2-t1)/1000000000f;
  println("Seconds: "+sec);
  println("Number of 65536-point WHTs per second: "+10000/sec);
}

void wht(float[] vec) {
  final int n = vec.length;
  for (int i=0; i<n; i+=8) {
    float a=vec[i];
    float b=vec[i+1];
    float c=vec[i+2];
    float d=vec[i+3];
    float e=vec[i+4];
    float f=vec[i+5];
    float g=vec[i+6];
    float h=vec[i+7];
    float t=a;
    a=a+b;
    b=t-b;
    t=c;
    c=c+d;
    d=t-d;
    t=a;
    a=a+c;
    c=t-c;
    t=b;
    b=b+d;
    d=t-d;
    t=e;
    e=e+f;
    f=t-f;
    t=g;
    g=g+h;
    h=t-h;
    t=e;
    e=e+g;
    g=t-g;
    t=f;
    f=f+h;
    h=t-h;
    t=a;
    a=a+e;
    e=t-e;
    t=b;
    b=b+f;
    f=t-f;
    t=c;
    c=c+g;
    g=t-g;
    t=d;
    d=d+h;
    h=t-h;
    vec[i]=a;
    vec[i+1]=b;
    vec[i+2]=c;
    vec[i+3]=d;
    vec[i+4]=e;
    vec[i+5]=f;
    vec[i+6]=g;
    vec[i+7]=h;
  }
  int hs = 8;
  while (hs < n) {
    int i = 0;
    while (i < n) {
      final int j = i + hs;  // final here is good hint to hotspot
      while (i < j) {
        float a = vec[i];
        float b = vec[i + hs];
        vec[i] = a + b;
        vec[i + hs] = a - b;
        i += 1;
      }
      i += hs;
    }
    hs += hs;
  }
}

I get 1400 65536-point WHTs per second on very weak hardware. There is always the FFHT library that you could write a JNI interface for. For GPUs I don’t know.

seanc4s · November 5, 2021, 12:42am

Completely disordering the input to a WHT with a fixed pattern of random sign flips or a random permutation tends to produce an output with the Gaussian random distribution. A random projection of the input data.
For dense input data like natural images a single application is enough to get a good random projection, for sparse input data, that is mostly zeros for example, a second application is needed.
The random projections are invertible because the WHT, sign-flipping and permutations are invertible.
You can use random projections for dimensional increase or decrease.
In particular it is useful for ‘fair’ sub-sampling of some input data, where all the different frequency components are sampled equally.
If you invert a particular sub-sample it looks like you have lost a lot of information. However, since the sub-sample is ‘fair’ there are actually mathematical ways (compressive sensing) of restoring from the sub-sample quite well.
If you binarize the output of a random projection you get a locality sensitive hash (LSH.)
With a normal hash algorithm the slightest change in the input produces a total change in the output. With a LSH a small change in the input produces a small change in the output, while still being a hashing algorithm.

PImage img;

void setup() {
  size(522, 522);
  background(0);
  int subSample=6553;  // Size of random projection subsample
  float[] data=new float[65536];
  float[] restoredData=new float[65536];
  float[] signFlips=new float[65536]; // Random sign flips to disorder WHT
  for(int i=0;i<65536;i++){
    signFlips[i]=random(-1,1)<0? -1:1;
  }  
  img = loadImage("https://www.culture-et-formation.fr/wp-content/uploads/2010/01/cat-256x2564.png");
  for (int i=0; i<65536; i++) {
    int c=img.get(i&255, i>>8);
    float bw=0.3333*(red(c)+green(c)+blue(c));
    data[i]=bw;
    set(i&255, i>>8, color((int)bw));
  }
  //
  for(int i=0;i<65536;i++){
    data[i]*=signFlips[i];  // Random Pattern of sign flips...
  }
  wht(data); // ... followed by a WHT gives a random projection
  for(int i=0;i<subSample;i++){
    restoredData[i]=data[i];  // Get a subsample of the random projection
  }
  wht(restoredData); // Invert the random projection process on the subsample
  for(int i=0;i<65536;i++){
    restoredData[i]*=signFlips[i]; 
  }
  for (int i=0; i<65536; i++) {
    set(265+(i&255), i>>8, color(constrain(int(data[i]*0.01f),0,255)));
    set(i&255,265+( i>>8), color(constrain(int(restoredData[i]),0,255)));
  }
  noLoop();
}

void wht(float[] vec) {
  final int n = vec.length;
  for (int i=0; i<n; i+=8) {
    float a=vec[i];
    float b=vec[i+1];
    float c=vec[i+2];
    float d=vec[i+3];
    float e=vec[i+4];
    float f=vec[i+5];
    float g=vec[i+6];
    float h=vec[i+7];
    float t=a;
    a=a+b;
    b=t-b;
    t=c;
    c=c+d;
    d=t-d;
    t=a;
    a=a+c;
    c=t-c;
    t=b;
    b=b+d;
    d=t-d;
    t=e;
    e=e+f;
    f=t-f;
    t=g;
    g=g+h;
    h=t-h;
    t=e;
    e=e+g;
    g=t-g;
    t=f;
    f=f+h;
    h=t-h;
    t=a;
    a=a+e;
    e=t-e;
    t=b;
    b=b+f;
    f=t-f;
    t=c;
    c=c+g;
    g=t-g;
    t=d;
    d=d+h;
    h=t-h;
    vec[i]=a;
    vec[i+1]=b;
    vec[i+2]=c;
    vec[i+3]=d;
    vec[i+4]=e;
    vec[i+5]=f;
    vec[i+6]=g;
    vec[i+7]=h;
  }
  int hs = 8;
  while (hs < n) {
    int i = 0;
    while (i < n) {
      final int j = i + hs;  // final here is good hint to hotspot
      while (i < j) {
        float a = vec[i];
        float b = vec[i + hs];
        vec[i] = a + b;
        vec[i + hs] = a - b;
        i += 1;
      }
      i += hs;
    }
    hs += hs;
  }
}

seanc4s · November 6, 2021, 4:46am

Restoring an image from a random projection sub-sample.
When you look at the simple inverse of a random projection sub-sample of an image, you would think you had lost a lot of information. However higher dimensional space is a funny place and actually the sub-sample contains a lot of the original information, which you can bring out in various ways.
The complicated ways involve theories from compressive sensing.
The easy way involves smoothing with a binomial filter.

PImage img;
float[] subsample=new float[6553];
float[] restoredData=new float[65536];
float[] buffer=new float[65536];
float[] signFlips=new float[65536]; // Random sign flips to disorder WHT
void setup() {
  size(512, 256);
  float[] data=new float[65536];
  for (int i=0; i<65536; i++) {
    signFlips[i]=random(-1, 1)<0? -1:1;
  }
  img = loadImage("http://www.roomelegance.com/wp-content/uploads/2012/04/English-Country-Houses-2-English-Village-Home.jpg");
  for (int i=0; i<65536; i++) {
    int c=img.get(i&255, i>>8);
    float bw=0.3333*(red(c)+green(c)+blue(c));
    set(i&255,i>>8,color(bw));
    data[i]=bw;
  }
  //
  for (int i=0; i<65536; i++) {
    data[i]*=signFlips[i];  // Random Pattern of sign flips...
  }
  wht(data); // ... followed by a WHT gives a random projection
  for (int i=0; i<subsample.length; i++) {
    subsample[i]=data[i]*1f/256f; // Get a subsample of the random projection
  }
}
void draw() {
  //background(100);
  for(int i=0;i<subsample.length;i++){
    restoredData[i]=subsample[i]; // force in correct values
  }
  wht(restoredData); // Invert the random projection
  for (int i=0; i<65536; i++) {
    restoredData[i]*=signFlips[i]*1f/256f;
  }
  binomialFilter256(restoredData,buffer);
  for (int i=0; i<65536; i++) {
    int x=i&255;
    int y=i>>8;
    set(x+256,y,color(constrain(restoredData[i],0,255)));
  }
  for (int i=0; i<65536; i++) {
    restoredData[i]=signFlips[i]*restoredData[i]*1f/256f;
  }
  wht(restoredData);
}

void binomialFilter256(float[] img,float[] buffer) {
  for (int y=0; y<256; y++) {
    int idx=y*256;
    float a;
    float b=img[idx];
    float c=b;
    for (int x=0; x<255; x++) {
      a=b;
      b=c;
      c=img[idx+x+1];
      buffer[idx+x]=0.25f*a+0.5f*b+0.25f*c;
    }
    buffer[idx+255]=0.25*b+0.75f*c;
  }
  for (int x=0; x<256; x++) {
    float a;
    float b=img[x];
    float c=b;
    for (int y=0; y<255; y++) {
      int idx=y*256;
      a=b;
      b=c;
      c=buffer[idx+x+256];
      img[idx+x]=0.25f*a+0.5f*b+0.25f*c;
    }
    img[255*256+x]=0.25*b+0.75f*c;
  }
}

void wht(float[] vec) {
  final int n = vec.length;
  for (int i=0; i<n; i+=8) {
    float a=vec[i];
    float b=vec[i+1];
    float c=vec[i+2];
    float d=vec[i+3];
    float e=vec[i+4];
    float f=vec[i+5];
    float g=vec[i+6];
    float h=vec[i+7];
    float t=a;
    a=a+b;
    b=t-b;
    t=c;
    c=c+d;
    d=t-d;
    t=a;
    a=a+c;
    c=t-c;
    t=b;
    b=b+d;
    d=t-d;
    t=e;
    e=e+f;
    f=t-f;
    t=g;
    g=g+h;
    h=t-h;
    t=e;
    e=e+g;
    g=t-g;
    t=f;
    f=f+h;
    h=t-h;
    t=a;
    a=a+e;
    e=t-e;
    t=b;
    b=b+f;
    f=t-f;
    t=c;
    c=c+g;
    g=t-g;
    t=d;
    d=d+h;
    h=t-h;
    vec[i]=a;
    vec[i+1]=b;
    vec[i+2]=c;
    vec[i+3]=d;
    vec[i+4]=e;
    vec[i+5]=f;
    vec[i+6]=g;
    vec[i+7]=h;
  }
  int hs = 8;
  while (hs < n) {
    int i = 0;
    while (i < n) {
      final int j = i + hs;  // final here is good hint to hotspot
      while (i < j) {
        float a = vec[i];
        float b = vec[i + hs];
        vec[i] = a + b;
        vec[i + hs] = a - b;
        i += 1;
      }
      i += hs;
    }
    hs += hs;
  }
}

seanc4s · November 9, 2021, 3:11pm

Some links for networks based on fast transforms:
https://www.kdnuggets.com/2021/07/wht-simpler-fast-fourier-transform-fft.html
https://discuss.neuralmagic.com/t/fast-transforms-for-sparsity/41
https://discuss.neuralmagic.com/t/going-beyond-relu/55
https://editor.p5js.org/seanhaddps/sketches/ERaMwFcej
https://editor.p5js.org/seanhaddps/sketches/e8M3UO71-
https://archive.org/details/activation-weight-switching
https://archive.org/details/afrozenneuralnetwork
https://archive.org/details/zero-curvatue

seanc4s · December 28, 2021, 10:54am

You are not going to get any money for technical writing these days. Twenty years ago you could still get $500 for a magazine article.
Markets change. That’s just the way it is. Lol.
I put together this booklet on the Walsh Hadamard transform if anyone is curious:
https://archive.org/details/whtebook-archive

seanc4s · December 28, 2021, 11:01am

Also a second glance at the weighted sum as used in neural networks:
https://archive.org/details/the-weighted-sum

seanc4s · December 28, 2021, 11:11am

You can use this website for free images of the correct size for some of the examples above where the image url has become outdated:
https://picsum.photos/
Eg:
https://picsum.photos/256

seanc4s · March 29, 2022, 2:47am

Transformer neural networks seem to be all the rage. “Attention is all you need.” is the catchphrase apparently. I thought I’d give it a try.


final class SWNet4trfm {
  final int vecLen;  // Vector size must be a power of 2 like 16,32,64,etc
  final int depth;   // Number of neural layers
  final int trfm;    // Number of transformer (attention) stages
  final float scale;
  final float[] params;
  final float[] flips;
  SWNet4trfm(int vecLen, int depth, int trfm) {
    this.vecLen = vecLen;
    this.depth = depth;
    this.trfm=trfm;
    scale = 2f / sqrt(vecLen);
    params = new float[8 * vecLen * depth*trfm];
    flips = new float[vecLen];
    for (int i = 0; i < vecLen; i++) {
      flips[i] = sin(i * 1.895) < 0 ? -0.5f*scale : 0.5f*scale;
    }
    for (int i = 0, j=0; i < params.length; i += 8) {
      params[i+j] = scale;
      params[i+j + 4] = scale;
      j=(j+1)&3;
    }
  }
  void recall(float[] result, float[] input) {
    for (int i = 0; i < vecLen; i++) {
      result[i] = input[i] * flips[i];
    }
    int paIdx = 0;
    wht(result);
    for (int t = 1; t <= trfm; t++) {
      for (int i = 0; i < depth; i++) {
        for (int j = 0; j < vecLen; j += 4) {
          float x, a, b, c, d;
          x= result[j];
          if (x < 0) {
            a = x * params[paIdx];
            b = x * params[paIdx + 1];
            c = x * params[paIdx + 2];
            d = x * params[paIdx + 3];
          } else {
            a = x * params[paIdx + 4];
            b = x * params[paIdx + 5];
            c = x * params[paIdx + 6];
            d = x * params[paIdx + 7];
          }
          paIdx += 8;
          x = result[j + 1];
          if (x < 0) {
            a += x * params[paIdx];
            b += x * params[paIdx + 1];
            c += x * params[paIdx + 2];
            d += x * params[paIdx + 3];
          } else {
            a += x * params[paIdx + 4];
            b += x * params[paIdx + 5];
            c += x * params[paIdx + 6];
            d += x * params[paIdx + 7];
          }
          paIdx += 8;
          x = result[j + 2];
          if (x < 0) {
            a += x * params[paIdx];
            b += x * params[paIdx + 1];
            c += x * params[paIdx + 2];
            d += x * params[paIdx + 3];
          } else {
            a += x * params[paIdx + 4];
            b += x * params[paIdx + 5];
            c += x * params[paIdx + 6];
            d += x * params[paIdx + 7];
          }
          paIdx += 8;
          x = result[j + 3];
          if (x < 0) {
            a += x * params[paIdx];
            b += x * params[paIdx + 1];
            c += x * params[paIdx + 2];
            d += x * params[paIdx + 3];
          } else {
            a += x * params[paIdx + 4];
            b += x * params[paIdx + 5];
            c += x * params[paIdx + 6];
            d += x * params[paIdx + 7];
          }
          paIdx += 8;
          result[j] = a;
          result[j + 1] = b;
          result[j + 2] = c;
          result[j + 3] = d;
        }
        wht4(result);
      }
      if (t!=trfm) {
        for (int i=0; i<vecLen; i++) {
          result[i]*=input[i];
        }
      }
    }
  }
}
// The width 4 neural layers provide 4 way connectivity which means
// you only need do a partial Walsh Hadamard transform to get full
// connectivity.
void wht4(float[] vec) {
  final int n = vec.length;
  for (int i=0; i<n; i+=8) {
    float a=vec[i];
    float b=vec[i+1];
    float c=vec[i+2];
    float d=vec[i+3];
    float e=vec[i+4];
    float f=vec[i+5];
    float g=vec[i+6];
    float h=vec[i+7];
    float t=a;
    a=a+e;
    e=t-e;
    t=b;
    b=b+f;
    f=t-f;
    t=c;
    c=c+g;
    g=t-g;
    t=d;
    d=d+h;
    h=t-h;
    vec[i]=a;
    vec[i+1]=b;
    vec[i+2]=c;
    vec[i+3]=d;
    vec[i+4]=e;
    vec[i+5]=f;
    vec[i+6]=g;
    vec[i+7]=h;
  }
  int hs = 8;
  while (hs < n) {
    int i = 0;
    while (i < n) {
      final int j = i + hs;  // final here is good hint to hotspot
      while (i < j) {
        float a = vec[i];
        float b = vec[i + hs];
        vec[i] = a + b;
        vec[i + hs] = a - b;
        i += 1;
      }
      i += hs;
    }
    hs += hs;
  }
}
// Full Fast Walsh Hadamard Transform
void wht(float[] vec) {
  final int n = vec.length;
  for (int i=0; i<n; i+=8) {
    float a=vec[i];
    float b=vec[i+1];
    float c=vec[i+2];
    float d=vec[i+3];
    float e=vec[i+4];
    float f=vec[i+5];
    float g=vec[i+6];
    float h=vec[i+7];
    float t=a;
    a=a+b;
    b=t-b;
    t=c;
    c=c+d;
    d=t-d;
    t=a;
    a=a+c;
    c=t-c;
    t=b;
    b=b+d;
    d=t-d;
    t=e;
    e=e+f;
    f=t-f;
    t=g;
    g=g+h;
    h=t-h;
    t=e;
    e=e+g;
    g=t-g;
    t=f;
    f=f+h;
    h=t-h;
    t=a;
    a=a+e;
    e=t-e;
    t=b;
    b=b+f;
    f=t-f;
    t=c;
    c=c+g;
    g=t-g;
    t=d;
    d=d+h;
    h=t-h;
    vec[i]=a;
    vec[i+1]=b;
    vec[i+2]=c;
    vec[i+3]=d;
    vec[i+4]=e;
    vec[i+5]=f;
    vec[i+6]=g;
    vec[i+7]=h;
  }
  int hs = 8;
  while (hs < n) {
    int i = 0;
    while (i < n) {
      final int j = i + hs;  // final here is good hint to hotspot
      while (i < j) {
        float a = vec[i];
        float b = vec[i + hs];
        vec[i] = a + b;
        vec[i + hs] = a - b;
        i += 1;
      }
      i += hs;
    }
    hs += hs;
  }
}

// Sum of squared difference cost
float costL2(float[] vec, float[] tar) {
  float cost = 0;
  for (int i = 0; i < vec.length; i++) {
    float e = vec[i] - tar[i];
    cost += e * e;
  }
  return cost;
}

class Mutator {
  float[] previous;
  int[] pIdx;
  float limit;
  float noise;
  Mutator(int size, float limit, float noise) {
    previous = new float[size];
    pIdx = new int[size];
    this.limit = limit;
    this.noise=noise;
  }
  void mutate(float[] vec) {
    for (int i = 0; i < previous.length; i++) {
      int rpos = int(random(vec.length));
      float v = vec[rpos];
      pIdx[i] = rpos;
      previous[i] = v;
      float m = random(-limit *noise, limit *noise);
      float vm = v + m;
      if (vm >= this.limit) vm = v;
      if (vm <= -this.limit) vm = v;
      vec[rpos] = vm;
    }
  }
  void undo(float[] vec) {
    for (int i = previous.length - 1; i >= 0; i--) {
      vec[pIdx[i]] = previous[i];
    }
  }
}


// Test with Lissajous curves
color c1;
float[][] ex;
float[] work = new float[256];
float parentCost = Float.POSITIVE_INFINITY;
SWNet4trfm parentNet;
Mutator mut;

void setup() {
  size(400, 400);
  parentNet = new SWNet4trfm(256, 2, 2);
  mut = new Mutator(15, parentNet.scale, 0.01f);
  c1 = color(0xff, 0xd7, 00);
  ex=new float[8][256];
  for (int i = 0; i < 127; i++) {
    // Training data
    float t = (i * 2 * PI) / 127;
    ex[0][2 * i] = sin(t);
    ex[0][2 * i + 1] = sin(2 * t);
    ex[1][2 * i] = sin(2 * t);
    ex[1][2 * i + 1] = sin(t);
    ex[2][2 * i] = sin(2 * t);
    ex[2][2 * i + 1] = sin(3 * t);
    ex[3][2 * i] = sin(3 * t);
    ex[3][2 * i + 1] = sin(2 * t);
    ex[4][2 * i] = sin(3 * t);
    ex[4][2 * i + 1] = sin(4 * t);
    ex[5][2 * i] = sin(4 * t);
    ex[5][2 * i + 1] = sin(3 * t);
    ex[6][2 * i] = sin(2 * t);
    ex[6][2 * i + 1] = sin(5 * t);
    ex[7][2 * i] = sin(5 * t);
    ex[7][2 * i + 1] = sin(2 * t);
  }
  textSize(16);
}

void draw() {
  background(0);
  loadPixels();
  for (int i = 0; i < 100; i++) {
    mut.mutate(parentNet.params);
    float cost = 0;
    for (int j = 0; j < 8; j++) {
      parentNet.recall(work, ex[j]);
      cost += costL2(work, ex[j]); // autoassociate
    }
    if (cost < parentCost) {
      parentCost = cost;
    } else {
      mut.undo(parentNet.params);
    }
  }
  for (int i = 0; i < 8; i++) {
    for (int j = 0; j < 255; j += 2) {
      set(25 + i * 40 + int(18 * ex[i][j]), 44 + int(18 * ex[i][j + 1]), c1);
    }
  }
  for (int i = 0; i < 8; i++) {
    parentNet.recall(work, ex[i]);
    for (int j = 0; j < 255; j += 2) {
      set(25 + i * 40 +int( 18 * work[j]), 104 + int(18 * work[j + 1]), c1);
    }
  }
  text("Training Data", 5, 20);
  text("Autoassociative recall", 5, 80);
  text("Iterations: " + frameCount, 5, 150);
  text("Cost: " + parentCost, 5, 170);
}

That is only a simple example, I have other code here that works on images:
https://archive.org/details/swnet4transformer
I’ve only experimented with it a tiny amount but it seems nice.

Topic		Replies	Views
SwitchNet Neural network examples Gallery	9	175	April 3, 2025
Flaw in current neural networks Gallery	10	2087	June 29, 2019
Fixed filter bank neural networks Gallery	1	643	August 18, 2019
"Wave Collapse Function" algorithm in Processing Gallery	18	15582	June 3, 2022
Image compression using the Walsh Hadamard transform (for fun.) Gallery	0	195	April 2, 2023

Switch Net 4 - Neural Network

Related topics