Skip to content

Commit

Permalink
ARROW-3923: [Java] JDBC Time Fetches Without Timezone
Browse files Browse the repository at this point in the history
https://issues.apache.org/jira/browse/ARROW-3923

Hello!  I was reading through the JDBC source code and I noticed that a java.util.Calendar was required for creating an Arrow Schema and Arrow Vectors from a JDBC ResultSet, when none is required.

This change makes the Calendar optional.

Unit Tests:
The existing SureFire plugin configuration uses a UTC calendar for the database, which is the default Calendar in the existing code.  Likewise, no changes to the unit tests are required to provide adequate coverage for the change.

Author: Michael Pigott <mikepigott@users.noreply.github.com>
Author: Mike Pigott <mpigott@gmail.com>

Closes #3066 from mikepigott/jdbc-timestamp-no-calendar and squashes the following commits:

4d95da0 <Mike Pigott> ARROW-3923: Supporting a null Calendar in the config, and reverting the breaking change.
cd9a230 <Mike Pigott> Merge branch 'master' into jdbc-timestamp-no-calendar
509a1cc <Michael Pigott> Merge pull request #5 from apache/master
789c8c8 <Michael Pigott> Merge pull request #4 from apache/master
e5b19ee <Michael Pigott> Merge pull request #3 from apache/master
3b17c29 <Michael Pigott> Merge pull request #2 from apache/master
881c6c8 <Michael Pigott> Merge pull request #1 from apache/master
089cff4 <Mike Pigott> Format fixes
a58a4a5 <Mike Pigott> Fixing calendar usage.
e12832a <Mike Pigott> Allowing for timestamps without a time zone.
  • Loading branch information
mikepigott authored and xhochy committed Feb 5, 2019
1 parent 0c55b25 commit c93e2ae
Show file tree
Hide file tree
Showing 5 changed files with 46 additions and 28 deletions.
Expand Up @@ -179,13 +179,12 @@ public static VectorSchemaRoot sqlToArrow(ResultSet resultSet, BaseAllocator all
* For the given JDBC {@link ResultSet}, fetch the data from Relational DB and convert it to Arrow objects.
*
* @param resultSet ResultSet to use to fetch the data from underlying database
* @param calendar Calendar instance to use for Date, Time and Timestamp datasets.
* @param calendar Calendar instance to use for Date, Time and Timestamp datasets, or <code>null</code> if none.
* @return Arrow Data Objects {@link VectorSchemaRoot}
* @throws SQLException on error
*/
public static VectorSchemaRoot sqlToArrow(ResultSet resultSet, Calendar calendar) throws SQLException, IOException {
Preconditions.checkNotNull(resultSet, "JDBC ResultSet object can not be null");
Preconditions.checkNotNull(calendar, "Calendar object can not be null");

return sqlToArrow(resultSet, new JdbcToArrowConfig(new RootAllocator(Integer.MAX_VALUE), calendar));
}
Expand All @@ -195,15 +194,14 @@ public static VectorSchemaRoot sqlToArrow(ResultSet resultSet, Calendar calendar
*
* @param resultSet ResultSet to use to fetch the data from underlying database
* @param allocator Memory allocator to use.
* @param calendar Calendar instance to use for Date, Time and Timestamp datasets.
* @param calendar Calendar instance to use for Date, Time and Timestamp datasets, or <code>null</code> if none.
* @return Arrow Data Objects {@link VectorSchemaRoot}
* @throws SQLException on error
*/
public static VectorSchemaRoot sqlToArrow(ResultSet resultSet, BaseAllocator allocator, Calendar calendar)
throws SQLException, IOException {
Preconditions.checkNotNull(resultSet, "JDBC ResultSet object can not be null");
Preconditions.checkNotNull(allocator, "Memory Allocator object can not be null");
Preconditions.checkNotNull(calendar, "Calendar object can not be null");

return sqlToArrow(resultSet, new JdbcToArrowConfig(allocator, calendar));
}
Expand Down
Expand Up @@ -48,15 +48,15 @@ public final class JdbcToArrowConfig {
*/
JdbcToArrowConfig(BaseAllocator allocator, Calendar calendar) {
Preconditions.checkNotNull(allocator, "Memory allocator cannot be null");
Preconditions.checkNotNull(calendar, "Calendar object can not be null");

this.allocator = allocator;
this.calendar = calendar;
}

/**
* The calendar to use when defining Arrow Timestamp fields
* and retrieving time-based fields from the database.
* and retrieving {@link Date}, {@link Time}, or {@link Timestamp}
* data types from the {@link ResultSet}, or <code>null</code> if not converting.
* @return the calendar.
*/
public Calendar getCalendar() {
Expand Down
Expand Up @@ -32,7 +32,7 @@ public class JdbcToArrowConfigBuilder {

/**
* Default constructor for the <code>JdbcToArrowConfigBuilder}</code>.
* Use the setter methods for the allocator and calendar; both must be
* Use the setter methods for the allocator and calendar; the allocator must be
* set. Otherwise, {@link #build()} will throw a {@link NullPointerException}.
*/
public JdbcToArrowConfigBuilder() {
Expand All @@ -41,9 +41,9 @@ public JdbcToArrowConfigBuilder() {
}

/**
* Constructor for the <code>JdbcToArrowConfigBuilder</code>. Both the
* allocator and calendar are required. A {@link NullPointerException}
* will be thrown if one of the arguments is <code>null</code>.
* Constructor for the <code>JdbcToArrowConfigBuilder</code>. The
* allocator is required, and a {@link NullPointerException}
* will be thrown if it is <code>null</code>.
* <p>
* The allocator is used to construct Arrow vectors from the JDBC ResultSet.
* The calendar is used to determine the time zone of {@link java.sql.Timestamp}
Expand All @@ -59,7 +59,6 @@ public JdbcToArrowConfigBuilder(BaseAllocator allocator, Calendar calendar) {
this();

Preconditions.checkNotNull(allocator, "Memory allocator cannot be null");
Preconditions.checkNotNull(calendar, "Calendar object can not be null");

this.allocator = allocator;
this.calendar = calendar;
Expand All @@ -82,10 +81,8 @@ public JdbcToArrowConfigBuilder setAllocator(BaseAllocator allocator) {
* Arrow schema, and reading time-based fields from the JDBC <code>ResultSet</code>.
*
* @param calendar the calendar to set.
* @exception NullPointerExeption if <code>calendar</code> is <code>null</code>.
*/
public JdbcToArrowConfigBuilder setCalendar(Calendar calendar) {
Preconditions.checkNotNull(calendar, "Calendar object can not be null");
this.calendar = calendar;
return this;
}
Expand Down
Expand Up @@ -240,15 +240,15 @@ private static void allocateVectors(VectorSchemaRoot root, int size) {
*
* @param rs ResultSet to use to fetch the data from underlying database
* @param root Arrow {@link VectorSchemaRoot} object to populate
* @param calendar The calendar to use when reading time-based data.
* @param calendar The calendar to use when reading {@link Date}, {@link Time}, or {@link Timestamp}
* data types from the {@link ResultSet}, or <code>null</code> if not converting.
* @throws SQLException on error
*/
public static void jdbcToArrowVectors(ResultSet rs, VectorSchemaRoot root, Calendar calendar)
throws SQLException, IOException {

Preconditions.checkNotNull(rs, "JDBC ResultSet object can't be null");
Preconditions.checkNotNull(root, "Vector Schema cannot be null");
Preconditions.checkNotNull(calendar, "Calendar object can't be null");
Preconditions.checkNotNull(root, "JDBC ResultSet object can't be null");

jdbcToArrowVectors(rs, root, new JdbcToArrowConfig(new RootAllocator(0), calendar));
}
Expand All @@ -274,6 +274,8 @@ public static void jdbcToArrowVectors(ResultSet rs, VectorSchemaRoot root, JdbcT

allocateVectors(root, DEFAULT_BUFFER_SIZE);

final Calendar calendar = config.getCalendar();

int rowCount = 0;
while (rs.next()) {
for (int i = 1; i <= columnCount; i++) {
Expand Down Expand Up @@ -324,17 +326,35 @@ public static void jdbcToArrowVectors(ResultSet rs, VectorSchemaRoot root, JdbcT
rs.getString(i), !rs.wasNull(), rowCount);
break;
case Types.DATE:
updateVector((DateMilliVector) root.getVector(columnName),
rs.getDate(i, config.getCalendar()), !rs.wasNull(), rowCount);
final Date date;
if (calendar != null) {
date = rs.getDate(i, calendar);
} else {
date = rs.getDate(i);
}

updateVector((DateMilliVector) root.getVector(columnName), date, !rs.wasNull(), rowCount);
break;
case Types.TIME:
updateVector((TimeMilliVector) root.getVector(columnName),
rs.getTime(i, config.getCalendar()), !rs.wasNull(), rowCount);
final Time time;
if (calendar != null) {
time = rs.getTime(i, calendar);
} else {
time = rs.getTime(i);
}

updateVector((TimeMilliVector) root.getVector(columnName), time, !rs.wasNull(), rowCount);
break;
case Types.TIMESTAMP:
final Timestamp ts;
if (calendar != null) {
ts = rs.getTimestamp(i, calendar);
} else {
ts = rs.getTimestamp(i);
}

// TODO: Need to handle precision such as milli, micro, nano
updateVector((TimeStampVector) root.getVector(columnName),
rs.getTimestamp(i, config.getCalendar()), !rs.wasNull(), rowCount);
updateVector((TimeStampVector) root.getVector(columnName), ts, !rs.wasNull(), rowCount);
break;
case Types.BINARY:
case Types.VARBINARY:
Expand Down
Expand Up @@ -42,14 +42,16 @@ public void testBuilderNullArguments() {
new JdbcToArrowConfigBuilder(null, null);
}

@Test(expected = NullPointerException.class)
public void testConfigNullCalendar() {
new JdbcToArrowConfig(allocator, null);
JdbcToArrowConfig config = new JdbcToArrowConfig(allocator, null);
assertNull(config.getCalendar());
}

@Test(expected = NullPointerException.class)
@Test
public void testBuilderNullCalendar() {
new JdbcToArrowConfigBuilder(allocator, null);
JdbcToArrowConfigBuilder builder = new JdbcToArrowConfigBuilder(allocator, null);
JdbcToArrowConfig config = builder.build();
assertNull(config.getCalendar());
}

@Test(expected = NullPointerException.class)
Expand All @@ -68,10 +70,11 @@ public void testSetNullAllocator() {
builder.setAllocator(null);
}

@Test(expected = NullPointerException.class)
@Test
public void testSetNullCalendar() {
JdbcToArrowConfigBuilder builder = new JdbcToArrowConfigBuilder(allocator, calendar);
builder.setCalendar(null);
JdbcToArrowConfig config = builder.setCalendar(null).build();
assertNull(config.getCalendar());
}

@Test
Expand Down

0 comments on commit c93e2ae

Please sign in to comment.