Skip to content

Commit a1ce9d7

Browse files
committed
Clarified hexadecimal characters used in converting from characters to values to be strictly 0-9, a-f, and A-F.
Added a test to verify isHexDigit and fromHexDigit for the entire range of chars
1 parent 29f9bf7 commit a1ce9d7

File tree

2 files changed

+46
-22
lines changed

2 files changed

+46
-22
lines changed

src/java.base/share/classes/java/util/HexFormat.java

Lines changed: 38 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,9 @@
5454
* methods include {@link #fromHexDigits(CharSequence) fromHexDigits(string)},
5555
* {@link #fromHexDigitsToLong(CharSequence) fromHexDigitsToLong(string)}, and
5656
* {@link #fromHexDigit(int) fromHexDigit(int)} converts a single character or codepoint.
57-
*
57+
* For conversions from hexadecimal characters the digits and uppercase and lowercase
58+
* characters in {@code "0-9", "a-f", and "A-F"} are converted to corresponding values
59+
* {@code 0-15}.
5860
* <p>
5961
* For byte array to formatted hexadecimal string conversions
6062
* the {@code formatHex} methods include {@link #formatHex(byte[]) formatHex(byte[])}
@@ -143,7 +145,22 @@ public final class HexFormat {
143145
'0', '1', '2', '3', '4', '5', '6', '7',
144146
'8', '9', 'a', 'b', 'c', 'd', 'e', 'f',
145147
};
146-
148+
// Analysis has shown that generating the whole array allows the JIT to generate
149+
// better code compared to a slimmed down array, such as one cutting off after 'f'
150+
private static final byte[] DIGITS = new byte[] {
151+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
152+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
153+
-1, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1,
154+
-1, -1, -1, -1, -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
155+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 10, 11, 12,
156+
13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
157+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
158+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
159+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
160+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
161+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
162+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
163+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 };
147164
/**
148165
* Format each byte of an array as a pair of hexadecimal digits.
149166
* The hexadecimal characters are from lowercase alpha digits.
@@ -829,32 +846,38 @@ private static int checkDigitCount(int fromIndex, int toIndex, int limit) {
829846

830847
/**
831848
* Returns {@code true} if the character is a valid hexadecimal character or codepoint.
832-
* A character is a valid hexadecimal character if
833-
* {@link Character#digit(int, int) Character.digit(int, 16)} returns
834-
* a positive value.
835-
*
849+
* The valid hexadecimal characters are:
850+
* <ul>
851+
* <li>{@code '0' ('\u005Cu0030')} through {@code '9' ('\u005Cu0039')} inclusive,
852+
* <li>{@code 'A' ('\u005Cu0041')} through {@code 'F' ('\u005Cu0046')} inclusive, and
853+
* <li>{@code 'a' ('\u005Cu0061')} through {@code 'f' ('\u005Cu0066')} inclusive.
854+
* </ul>
836855
* @param ch a codepoint
837856
* @return {@code true} if the character is valid a hexadecimal character,
838857
* otherwise {@code false}
839858
*/
840859
public boolean isHexDigit(int ch) {
841-
return Character.digit(ch, 16) >= 0;
860+
return ((ch >>> 8) == 0 && DIGITS[ch] >= 0);
842861
}
843862

844863
/**
845864
* Returns the value for the hexadecimal character or codepoint.
846-
* The characters {@code "0-9", "A-F", "a-f"} are parsed
847-
* using {@link Character#digit(int, int) Character.digit(int, 16)}.
848-
*
865+
* The value is:
866+
* <ul>
867+
* <li>{@code (ch - '0')} for {@code '0'} through {@code '9'} inclusive,
868+
* <li>{@code (ch - 'A' + 10)} for {@code 'A'} through {@code 'F'} inclusive, and
869+
* <li>{@code (ch - 'a' + 10)} for {@code 'a'} through {@code 'f'} inclusive.
870+
* </ul>
849871
* @param ch a character or codepoint
850-
* @return the value {@code 0..15}
872+
* @return the value {@code 0-15}
851873
* @throws NumberFormatException if the codepoint is not a hexadecimal character
852874
*/
853875
public int fromHexDigit(int ch) {
854-
int value = Character.digit(ch, 16);
855-
if (value < 0)
856-
throw new NumberFormatException("not a hexadecimal digit: \"" + (char)ch + "\" + " + ch);
857-
return value;
876+
int value;
877+
if ((ch >>> 8) == 0 && (value = DIGITS[ch]) >= 0) {
878+
return value;
879+
}
880+
throw new NumberFormatException("not a hexadecimal digit: \"" + (char) ch + "\" = " + ch);
858881
}
859882

860883
/**

test/jdk/java/util/HexFormat/HexFormatTest.java

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -184,13 +184,14 @@ static void testFromHexDigit() {
184184
@Test
185185
static void testFromHexInvalid() {
186186
HexFormat hex = HexFormat.of();
187-
// An assortment of invalid characters
188-
String chars = "\u0000 /:\u0040G\u0060g\u007f";
189-
for (int i = 0; i < chars.length(); i++) {
190-
char ch = chars.charAt(i);
191-
Throwable ex = expectThrows(NumberFormatException.class,
192-
() -> hex.fromHexDigit(ch));
193-
System.out.println(ex);
187+
for (int i = 0; i < 65536; i++) {
188+
char ch = (char)i;
189+
if (ch > 0xff || Character.digit(ch, 16) < 0) {
190+
assertFalse(hex.isHexDigit(ch), "isHexDigit incorrect for '" + ch + "' = " + i);
191+
expectThrows(NumberFormatException.class,
192+
() -> hex.fromHexDigit(ch));
193+
194+
}
194195
}
195196
}
196197

0 commit comments

Comments
 (0)