Skip to content

Commit e810185

Browse files
committed
Use new parseHiveDate for OpenX reader to remove any characters after yyyy-mm-dd
1 parent 5fe5d3b commit e810185

File tree

3 files changed

+34
-3
lines changed

3 files changed

+34
-3
lines changed

lib/trino-hive-formats/src/main/java/io/trino/hive/formats/HiveFormatUtils.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ public final class HiveFormatUtils
7171
private static final char TIMESTAMP_FORMATS_SEPARATOR = ',';
7272
private static final char TIMESTAMP_FORMATS_ESCAPE = '\\';
7373

74-
private static final DateTimeFormatter DATE_PARSER = new DateTimeFormatterBuilder()
74+
public static final DateTimeFormatter DATE_PARSER = new DateTimeFormatterBuilder()
7575
.parseLenient()
7676
.appendValue(YEAR, 1, 10, SignStyle.NORMAL)
7777
.appendLiteral('-')

lib/trino-hive-formats/src/main/java/io/trino/hive/formats/line/openxjson/OpenXJsonDeserializer.java

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@
4343
import java.io.IOException;
4444
import java.math.BigDecimal;
4545
import java.math.RoundingMode;
46+
import java.time.LocalDate;
4647
import java.time.ZoneOffset;
4748
import java.time.ZonedDateTime;
4849
import java.time.format.DateTimeFormatter;
@@ -62,7 +63,7 @@
6263

6364
import static com.google.common.base.Preconditions.checkArgument;
6465
import static com.google.common.collect.ImmutableList.toImmutableList;
65-
import static io.trino.hive.formats.HiveFormatUtils.parseHiveDate;
66+
import static io.trino.hive.formats.HiveFormatUtils.DATE_PARSER;
6667
import static io.trino.hive.formats.HiveFormatUtils.parseHiveTimestamp;
6768
import static io.trino.hive.formats.HiveFormatUtils.scaleDecimal;
6869
import static io.trino.hive.formats.line.openxjson.JsonWriter.canonicalizeJsonString;
@@ -899,6 +900,32 @@ public static long parseDecimalHexOctalLong(String stringValue)
899900
return Long.parseLong(stringValue);
900901
}
901902

903+
/*
904+
* The parseHiveDate method in HiveFormatUtils.java that the native OpenX reader was using only supported
905+
* a space delimiter to remove any characters after 'yyyy-mm-dd'. As a result, while '2025-01-04 00:00:00.000Z'
906+
* was correctly parsed as '2025-01-04', strings like '2025-01-04T00:00:00.000Z' or '2025-01-04AA00:00:00.000Z'
907+
* were throwing exceptions and being parsed as null.
908+
* This new parseHiveDate method removes any characters after 'yyyy-mm-dd', regardless of the delimiter.
909+
*/
910+
private static LocalDate parseHiveDate(String value)
911+
{
912+
value = value.trim();
913+
int length = value.length();
914+
int endIndex = length;
915+
916+
// Find the first character that isn't a digit or hyphen
917+
for (int i = 0; i < length; i++) {
918+
char c = value.charAt(i);
919+
if (c != '-' && (c < '0' || c > '9')) {
920+
endIndex = i;
921+
break;
922+
}
923+
}
924+
925+
String datePortion = value.substring(0, endIndex);
926+
return LocalDate.parse(datePortion, DATE_PARSER);
927+
}
928+
902929
private static boolean isHex(String s)
903930
{
904931
// This does not allow for `0x-123`

lib/trino-hive-formats/src/test/java/io/trino/hive/formats/line/openxjson/TestOpenxJsonFormat.java

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -885,7 +885,11 @@ public void testDate()
885885
assertDate("\"1969-12-31\"", -1);
886886

887887
// Hive ignores everything after the first space
888-
assertDate("\"1986-01-01 anything is allowed here\"", LocalDate.of(1986, 1, 1).toEpochDay());
888+
long day = LocalDate.of(1986, 1, 1).toEpochDay();
889+
SqlDate sqlDate = new SqlDate((int) day);
890+
assertDate("\"1986-01-01 anything is allowed here\"", day);
891+
assertValueTrinoOnly(DATE, "\"1986-01-01T00:00:00.000Z\"", sqlDate);
892+
assertValueTrinoOnly(DATE, "\"1986-01-01AA00:00:00.000Z\"", sqlDate);
889893

890894
assertDate("\"1986-01-01\"", LocalDate.of(1986, 1, 1).toEpochDay());
891895
assertDate("\"1986-01-33\"", LocalDate.of(1986, 2, 2).toEpochDay());

0 commit comments

Comments
 (0)