In order to better understand what is happening, I started to solve the UTF-8 encode and decode related to this task.
The task is :
My working stage is:
Where I ran again in problems because the last one is also a combined character.
I can´t believe this isn’t possible in java. It must be possible at byte level.
import java.nio.charset.StandardCharsets;
import java.util.Formatter;
int t = 50;
int tel;
Character[] chars = {'A', 'ö', 'Ж', '€', '?'}; // I changed last character because "\u1D11E" the Musical Symbol G Clef
// can't be displayed in the editor it gives the errot "Invalid character constant "
void setup() {
size(740, 200);
background(255);
fill(0);
textSize(15);
text("Character Name Unicode UTF-8 encoding (hex)", 25, 30);
text("-------------------------------------------------------------------------------", 25, 50);
for (int codepoint : new int[]{0x0041, 0x00F6, 0x0416, 0x20AC, 0x1D11}) {
byte[] encoded = utf8encode(codepoint);
Formatter formatter = new Formatter();
for (byte b : encoded) {
formatter.format("%02X ", b);
}
String encodedHex = formatter.toString();
int decoded = utf8decode(encoded);
println(decoded);
t += 25;
text(chars[tel], 30, t);
text(Character.getName(codepoint), 130, t);
text("U+"+hex(chars[tel]), 450, t);
text(encodedHex, 550, t);
tel++;
}
}
final byte[] utf8encode(int codepoint) {
return new String(new int[]{codepoint}, 0, 1).getBytes(StandardCharsets.UTF_8);
}
int utf8decode(byte[] bytes) {
return new String(bytes, StandardCharsets.UTF_8).codePointAt(0);
}