Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import java.math.BigInteger;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import java.util.HashMap;
import org.apache.parquet.Preconditions;
Expand Down Expand Up @@ -657,12 +658,12 @@ static String getString(ByteBuffer value) {
checkIndex(start + length - 1, value.limit());
if (value.hasArray()) {
// If the buffer is backed by an array, we can use the array directly.
return new String(value.array(), value.arrayOffset() + start, length);
return new String(value.array(), value.arrayOffset() + start, length, StandardCharsets.UTF_8);
} else {
// If the buffer is not backed by an array, we need to copy the bytes into a new array.
byte[] valueArray = new byte[length];
slice(value, start).get(valueArray);
return new String(valueArray);
return new String(valueArray, StandardCharsets.UTF_8);
}
}
throw unexpectedType(Variant.Type.STRING, value);
Expand Down Expand Up @@ -825,12 +826,16 @@ static String getMetadataKey(ByteBuffer metadata, int id) {
}
checkIndex(dataPos + nextOffset - 1, metadata.limit());
if (metadata.hasArray() && !metadata.isReadOnly()) {
return new String(metadata.array(), metadata.arrayOffset() + dataPos + offset, nextOffset - offset);
return new String(
metadata.array(),
metadata.arrayOffset() + dataPos + offset,
nextOffset - offset,
StandardCharsets.UTF_8);
} else {
// ByteBuffer does not have an array, so we need to use the `get` method to read the bytes.
byte[] metadataArray = new byte[nextOffset - offset];
slice(metadata, dataPos + offset).get(metadataArray);
return new String(metadataArray);
return new String(metadataArray, StandardCharsets.UTF_8);
}
}

Expand Down Expand Up @@ -861,13 +866,14 @@ static HashMap<String, Integer> getMetadataMap(ByteBuffer metadata) {
new String(
metadata.array(),
metadata.arrayOffset() + pos + stringStart + offset,
nextOffset - offset),
nextOffset - offset,
StandardCharsets.UTF_8),
id);
} else {
// ByteBuffer does not have an array, so we need to use the `get` method to read the bytes.
byte[] metadataArray = new byte[nextOffset - offset];
slice(metadata, pos + stringStart + offset).get(metadataArray);
result.put(new String(metadataArray), id);
result.put(new String(metadataArray, StandardCharsets.UTF_8), id);
}
offset = nextOffset;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -406,4 +406,29 @@ public void testMetadataWithNonZeroPositionReadOnly() {
Assert.assertEquals(0, immutableMetadata.getOrInsert("name"));
Assert.assertEquals(1, immutableMetadata.getOrInsert("age"));
}

@Test
public void testMetadataMapWithUnicodeKeys() {
// Build a variant whose metadata dictionary contains non-ASCII keys.
VariantBuilder vb = new VariantBuilder();
VariantObjectBuilder obj = vb.startObject();
obj.appendKey("élève");
obj.appendInt(1);
obj.appendKey("中文");
obj.appendInt(2);
vb.endObject();
Variant variant = vb.build();

ByteBuffer metaBuf = variant.getMetadataBuffer();

// hasArray branch
ImmutableMetadata writable = new ImmutableMetadata(metaBuf);
Assert.assertEquals(0, writable.getOrInsert("élève"));
Assert.assertEquals(1, writable.getOrInsert("中文"));

// read-only branch (else path in getMetadataMap): asReadOnlyBuffer() makes isReadOnly() true
ImmutableMetadata readOnly = new ImmutableMetadata(metaBuf.asReadOnlyBuffer());
Assert.assertEquals(0, readOnly.getOrInsert("élève"));
Assert.assertEquals(1, readOnly.getOrInsert("中文"));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -241,6 +241,15 @@ public void testParseUnicodeString() throws IOException {
Assert.assertEquals("\u00e9l\u00e8ve", v.getString());
}

@Test
public void testParseUnicodeKey() throws IOException {
Variant v = VariantJsonParser.parseJson("{\"\\u00e9l\\u00e8ve\": 42}");
Assert.assertEquals(Variant.Type.OBJECT, v.getType());
Variant value = v.getFieldByKey("élève");
Assert.assertNotNull(value);
Assert.assertEquals(42, value.getInt());
}

@Test
public void testParseEscapedString() throws IOException {
Variant v = VariantJsonParser.parseJson("\"hello\\nworld\"");
Expand Down