diff --git a/core/java/src/net/i2p/data/Base64.java b/core/java/src/net/i2p/data/Base64.java index 8fb72e1c0af567e3ab1ca267fcdd3b500888f8fb..578dadda2526f4d8906efdf99fa5dbac497ee24b 100644 --- a/core/java/src/net/i2p/data/Base64.java +++ b/core/java/src/net/i2p/data/Base64.java @@ -42,13 +42,17 @@ public class Base64 { //private final static Log _log = new Log(Base64.class); /** + * Output will be a multiple of 4 chars, including 0-2 trailing '=' + * As of 0.9.14, encodes the UTF-8 encoding of source. Prior to that, used the platform's encoding. + * * @param source if null will return "" */ public static String encode(String source) { - return (source != null ? encode(source.getBytes()) : ""); + return (source != null ? encode(DataHelper.getUTF8(source)) : ""); } /** + * Output will be a multiple of 4 chars, including 0-2 trailing '=' * @param source if null will return "" */ public static String encode(byte[] source) { @@ -56,6 +60,7 @@ public class Base64 { } /** + * Output will be a multiple of 4 chars, including 0-2 trailing '=' * @param source if null will return "" */ public static String encode(byte[] source, int off, int len) { @@ -63,6 +68,7 @@ public class Base64 { } /** + * Output will be a multiple of 4 chars, including 0-2 trailing '=' * @param source if null will return "" * @param useStandardAlphabet Warning, must be false for I2P compatibility */ @@ -71,6 +77,7 @@ public class Base64 { } /** + * Output will be a multiple of 4 chars, including 0-2 trailing '=' * @param source if null will return "" * @param useStandardAlphabet Warning, must be false for I2P compatibility */ @@ -79,7 +86,17 @@ public class Base64 { } /** - * Decodes data from Base64 notation. + * Decodes data from Base64 notation using the I2P alphabet. + * + * As of 0.9.14, does not require trailing '=' if remaining bits are zero. + * Prior to that, trailing 1, 2, or 3 chars were ignored. + * + * As of 0.9.14, trailing garbage after an '=' will cause an error. + * Prior to that, it was ignored. + * + * As of 0.9.14, whitespace will cause an error. + * Prior to that, it was ignored. + * * @param s Base 64 encoded string using the I2P alphabet A-Z, a-z, 0-9, -, ~ * @return the decoded data, null on error */ @@ -122,6 +139,7 @@ public class Base64 { /** * Translates a Base64 value to either its 6-bit reconstruction value * or a negative number indicating some other meaning. + * As of 0.9.14 this is the decoding for the I2P alphabet. See safeDecode(). **/ private final static byte[] DECODABET = { -9, -9, -9, -9, -9, -9, -9, -9, -9, // Decimal 0 - 8 -5, -5, // Whitespace: Tab and Linefeed @@ -131,9 +149,8 @@ public class Base64 { -9, -9, -9, -9, -9, // Decimal 27 - 31 -5, // Whitespace: Space -9, -9, -9, -9, -9, -9, -9, -9, -9, -9, // Decimal 33 - 42 - 62, // Plus sign at decimal 43 - -9, -9, -9, // Decimal 44 - 46 - 63, // Slash at decimal 47 + //62, -9, -9, -9, 63, // + , - . / (43-47) NON-I2P + -9, -9, 62, -9, -9, // + , - . / (43-47) I2P 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, // Numbers zero through nine -9, -9, -9, // Decimal 58 - 60 -1, // Equals sign at decimal 61 @@ -143,8 +160,9 @@ public class Base64 { -9, -9, -9, -9, -9, -9, // Decimal 91 - 96 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, // Letters 'a' through 'm' 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, // Letters 'n' through 'z' - -9, -9, -9, -9 // Decimal 123 - 126 - /*,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9, // Decimal 127 - 139 + //-9, -9, -9, -9 // Decimal 123 - 126 (126 is '~') NON-I2P + -9, -9, -9, 63 // Decimal 123 - 126 (126 is '~') I2P + ,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9, // Decimal 127 - 139 -9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9, // Decimal 140 - 152 -9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9, // Decimal 153 - 165 -9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9, // Decimal 166 - 178 @@ -153,9 +171,10 @@ public class Base64 { -9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9, // Decimal 205 - 217 -9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9, // Decimal 218 - 230 -9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9, // Decimal 231 - 243 - -9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9 // Decimal 244 - 255 */ + -9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9 // Decimal 244 - 255 }; + private final static byte WHITE_SPACE_ENC = -5; // Indicates white space in encoding private final static byte EQUALS_SIGN_ENC = -1; // Indicates equals sign in encoding @@ -167,7 +186,6 @@ public class Base64 { //test(); if (args.length == 0) { help(); - return; } runApp(args); } @@ -175,22 +193,36 @@ public class Base64 { private static void runApp(String args[]) { String cmd = args[0].toLowerCase(Locale.US); if ("encodestring".equals(cmd)) { - System.out.println(encode(args[1].getBytes())); + if (args.length != 2) + help(); + System.out.println(encode(DataHelper.getUTF8(args[1]))); return; } if ("decodestring".equals(cmd)) { + if (args.length != 2) + help(); byte[] dec = decode(args[1]); if (dec != null) { try { System.out.write(dec); } catch (IOException ioe) { System.err.println("output error " + ioe); + System.exit(1); } } else { System.err.println("decode error"); + System.exit(1); } return; } + if ("test".equals(cmd)) { + System.err.println("test disabled"); + System.exit(1); + } + if (!("encode".equals(cmd) || "decode".equals(cmd))) { + System.err.println("unknown command " + cmd); + System.exit(1); + } InputStream in = System.in; OutputStream out = System.out; try { @@ -202,11 +234,8 @@ public class Base64 { } if ("encode".equals(cmd)) { encode(in, out); - return; - } - if ("decode".equals(cmd)) { + } else { decode(in, out); - return; } } catch (IOException ioe) { ioe.printStackTrace(System.err); @@ -238,16 +267,18 @@ public class Base64 { out.write(decoded); } + /** exits 1, never returns */ private static void help() { - System.out.println("Syntax: Base64 encode <inFile> <outFile>"); - System.out.println("or : Base64 encode <inFile>"); - System.out.println("or : Base64 encode"); - System.out.println("or : Base64 decode <inFile> <outFile>"); - System.out.println("or : Base64 decode <inFile>"); - System.out.println("or : Base64 decode"); - System.out.println("or : Base64 encodestring 'string to encode'"); - System.out.println("or : Base64 decodestring 'string to decode'"); - System.out.println("or : Base64 test"); + System.err.println("Usage: Base64 encode <inFile> <outFile>"); + System.err.println(" Base64 encode <inFile>"); + System.err.println(" Base64 encode (stdin to stdout)"); + System.err.println(" Base64 decode <inFile> <outFile>"); + System.err.println(" Base64 decode <inFile>"); + System.err.println(" Base64 decode (stdin to stdout)"); + System.err.println(" Base64 encodestring 'string to encode'"); + System.err.println(" Base64 decodestring 'string to decode'"); + System.err.println(" Base64 test"); + System.exit(1); } /******* @@ -458,12 +489,15 @@ public class Base64 { */ private static byte[] safeDecode(String source, boolean useStandardAlphabet) { if (source == null) return null; - String toDecode = null; + String toDecode; if (useStandardAlphabet) { - toDecode = source; + //toDecode = source; + toDecode = source.replace('/', '~'); + toDecode = toDecode.replace('+', '-'); } else { - toDecode = source.replace('~', '/'); - toDecode = toDecode.replace('-', '+'); + //toDecode = source.replace('~', '/'); + //toDecode = toDecode.replace('-', '+'); + toDecode = source; } return standardDecode(toDecode); } @@ -600,76 +634,75 @@ public class Base64 { * @param srcOffset the index where conversion begins * @param destination the array to hold the conversion * @param destOffset the index where output will be put - * @return the number of decoded bytes converted + * @return the number of decoded bytes converted 1-3, or -1 on error, never zero * @since 1.3 */ private static int decode4to3(byte[] source, int srcOffset, byte[] destination, int destOffset) { + byte decode0 = DECODABET[source[srcOffset++]]; + byte decode1 = DECODABET[source[srcOffset++]]; + if (decode0 < 0 || decode1 < 0) + return -1; + // Example: Dk== - if (source[srcOffset + 2] == EQUALS_SIGN) { - // Two ways to do the same thing. Don't know which way I like best. - //int outBuff = ( ( DECODABET[ source[ srcOffset ] ] << 24 ) >>> 6 ) - // | ( ( DECODABET[ source[ srcOffset + 1] ] << 24 ) >>> 12 ); - int outBuff = ((DECODABET[source[srcOffset]] & 0xFF) << 18) - | ((DECODABET[source[srcOffset + 1]] & 0xFF) << 12); - - destination[destOffset] = (byte) (outBuff >>> 16); + if (source[srcOffset] == EQUALS_SIGN) { + if (source[srcOffset + 1] != EQUALS_SIGN) + return -1; + // verify no extra bits + if ((decode1 & 0x0f) != 0) + return -1; + int outBuff = (decode0 << 18) + | (decode1 << 12); + destination[destOffset] = (byte) (outBuff >> 16); return 1; } // Example: DkL= - else if (source[srcOffset + 3] == EQUALS_SIGN) { - // Two ways to do the same thing. Don't know which way I like best. - //int outBuff = ( ( DECODABET[ source[ srcOffset ] ] << 24 ) >>> 6 ) - // | ( ( DECODABET[ source[ srcOffset + 1 ] ] << 24 ) >>> 12 ) - // | ( ( DECODABET[ source[ srcOffset + 2 ] ] << 24 ) >>> 18 ); - int outBuff = ((DECODABET[source[srcOffset]] & 0xFF) << 18) - | ((DECODABET[source[srcOffset + 1]] & 0xFF) << 12) - | ((DECODABET[source[srcOffset + 2]] & 0xFF) << 6); - - destination[destOffset] = (byte) (outBuff >>> 16); - destination[destOffset + 1] = (byte) (outBuff >>> 8); + else if (source[srcOffset + 1] == EQUALS_SIGN) { + byte decode2 = DECODABET[source[srcOffset]]; + if (decode2 < 0) + return -1; + // verify no extra bits + if ((decode2 & 0x03) != 0) + return -1; + int outBuff = (decode0 << 18) + | (decode1 << 12) + | (decode2 << 6); + destination[destOffset++] = (byte) (outBuff >> 16); + destination[destOffset] = (byte) (outBuff >> 8); return 2; } // Example: DkLE else { - try { - // Two ways to do the same thing. Don't know which way I like best. - //int outBuff = ( ( DECODABET[ source[ srcOffset ] ] << 24 ) >>> 6 ) - // | ( ( DECODABET[ source[ srcOffset + 1 ] ] << 24 ) >>> 12 ) - // | ( ( DECODABET[ source[ srcOffset + 2 ] ] << 24 ) >>> 18 ) - // | ( ( DECODABET[ source[ srcOffset + 3 ] ] << 24 ) >>> 24 ); - int outBuff = ((DECODABET[source[srcOffset]] & 0xFF) << 18) - | ((DECODABET[source[srcOffset + 1]] & 0xFF) << 12) - | ((DECODABET[source[srcOffset + 2]] & 0xFF) << 6) - | ((DECODABET[source[srcOffset + 3]] & 0xFF)); - - destination[destOffset] = (byte) (outBuff >> 16); - destination[destOffset + 1] = (byte) (outBuff >> 8); - destination[destOffset + 2] = (byte) (outBuff); - - return 3; - } catch (Exception e) { - System.out.println("" + source[srcOffset] + ": " + (DECODABET[source[srcOffset]])); - System.out.println("" + source[srcOffset + 1] + ": " + (DECODABET[source[srcOffset + 1]])); - System.out.println("" + source[srcOffset + 2] + ": " + (DECODABET[source[srcOffset + 2]])); - System.out.println("" + source[srcOffset + 3] + ": " + (DECODABET[source[srcOffset + 3]])); + byte decode2 = DECODABET[source[srcOffset++]]; + byte decode3 = DECODABET[source[srcOffset]]; + if (decode2 < 0 || decode3 < 0) return -1; - } //e nd catch + int outBuff = (decode0 << 18) + | (decode1 << 12) + | (decode2 << 6) + | decode3; + destination[destOffset++] = (byte) (outBuff >> 16); + destination[destOffset++] = (byte) (outBuff >> 8); + destination[destOffset] = (byte) (outBuff); + return 3; } } // end decodeToBytes /** * Decodes data from Base64 notation. + * As of 0.9.14, this uses the I2P alphabet, so it is not "standard". * * @param s the string to decode * @return the decoded data, null on error * @since 1.4 */ private static byte[] standardDecode(String s) { - byte[] bytes = new byte[s.length()]; - for (int i = 0; i < bytes.length; i++) - bytes[i] = (byte)(s.charAt(i) & 0xFF); + // We use getUTF8() instead of getASCII() so we may verify + // there's no UTF-8 in there. + byte[] bytes = DataHelper.getUTF8(s); + if (bytes.length != s.length()) + return null; return decode(bytes, 0, bytes.length); } // end decode @@ -678,21 +711,43 @@ public class Base64 { * returns it as a string. * Equivlaent to calling * <code>new String( decode( s ) )</code> - * WARNING this uses the locale's encoding, it may not be what you want. + * + * As of 0.9.14, decodes as UTF-8. Prior to that, it used the platform's encoding. + * For best results, decoded data should be 7 bit. + * + * As of 0.9.14, does not require trailing '=' if remaining bits are zero. + * Prior to that, trailing 1, 2, or 3 chars were ignored. + * + * As of 0.9.14, trailing garbage after an '=' will cause an error. + * Prior to that, it was ignored. + * + * As of 0.9.14, whitespace will cause an error. + * Prior to that, it was ignored. * * @param s the strind to decode - * @return The data as a string + * @return The data as a string, or null on error * @since 1.4 - * @throws NPE on error? */ public static String decodeToString(String s) { - return new String(decode(s)); + byte[] b = decode(s); + if (b == null) + return null; + return DataHelper.getUTF8(b); } // end decodeToString /** * Decodes Base64 content in byte array format and returns * the decoded byte array. * + * As of 0.9.14, does not require trailing '=' if remaining bits are zero. + * Prior to that, trailing 1, 2, or 3 chars were ignored. + * + * As of 0.9.14, trailing garbage after an '=' will cause an error. + * Prior to that, it was ignored. + * + * As of 0.9.14, whitespace will cause an error. + * Prior to that, it was ignored. + * * @param source The Base64 encoded data * @param off The offset of where to begin decoding * @param len The length of characters to decode @@ -701,39 +756,47 @@ public class Base64 { */ private static byte[] decode(byte[] source, int off, int len) { int len34 = len * 3 / 4; - byte[] outBuff = new byte[len34]; // Upper limit on size of output + byte[] outBuff = new byte[len34]; // size of output int outBuffPosn = 0; - byte[] b4 = new byte[4]; - int b4Posn = 0; - int i = 0; - byte sbiCrop = 0; - byte sbiDecode = 0; - for (i = 0; i < len; i++) { - sbiCrop = (byte) (source[i] & 0x7f); // Only the low seven bits - sbiDecode = DECODABET[sbiCrop]; - - if (sbiDecode >= WHITE_SPACE_ENC) // White space, Equals sign or better - { - if (sbiDecode >= EQUALS_SIGN_ENC) { - b4[b4Posn++] = sbiCrop; - if (b4Posn > 3) { - outBuffPosn += decode4to3(b4, 0, outBuff, outBuffPosn); - b4Posn = 0; - - // If that was the equals sign, break out of 'for' loop - if (sbiCrop == EQUALS_SIGN) break; - } // end if: quartet built - - } // end if: equals sign or better - - } // end if: white space, equals sign or better - else { - //_log.warn("Bad Base64 input character at " + i + ": " + source[i] + "(decimal)"); + int i = off; + int end = off + len; + int converted = 0; + while (i + 3 < end) { + converted = decode4to3(source, i, outBuff, outBuffPosn); + if (converted < 0) + return null; + outBuffPosn += converted; + i += 4; + if (converted < 3) + break; + } + + // process any remaining without '=' + int remaining = end - i; + if (remaining > 0) { + if (converted > 0 && converted < 3) return null; - } // end else: - } // each input character + if (remaining == 1 || remaining > 3) + return null; + byte[] b4 = new byte[4]; + b4[0] = source[i++]; + b4[1] = source[i++]; + if (remaining == 3) + b4[2] = source[i]; + else + b4[2] = EQUALS_SIGN; + b4[3] = EQUALS_SIGN; + converted = decode4to3(b4, 0, outBuff, outBuffPosn); + if (converted < 0) + return null; + outBuffPosn += converted; + } + // don't copy unless we have to + if (outBuffPosn == outBuff.length) + return outBuff; + // and we shouldn't ever... would have returned null before byte[] out = new byte[outBuffPosn]; System.arraycopy(outBuff, 0, out, 0, outBuffPosn); return out;