diff --git a/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/DefaultMetadataPolicy.java b/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/DefaultMetadataPolicy.java
new file mode 100644
index 000000000000..c5abfc1bef28
--- /dev/null
+++ b/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/DefaultMetadataPolicy.java
@@ -0,0 +1,32 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.iceberg.hive;
+
+/**
+ * Controls which Iceberg schema default metadata is embedded when converting Hive types.
+ *
+ *
ORC tables use {@link #WRITE_DEFAULT} because initial-default is applied via
+ * {@code updateColumnDefault} at the schema-update layer. Other file formats use
+ * {@link #WRITE_AND_INITIAL_DEFAULT} for V3 column defaults.
+ */
+public enum DefaultMetadataPolicy {
+ WRITE_DEFAULT,
+ WRITE_AND_INITIAL_DEFAULT
+}
diff --git a/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveSchemaConverter.java b/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveSchemaConverter.java
index 1b743343163e..716563caccba 100644
--- a/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveSchemaConverter.java
+++ b/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveSchemaConverter.java
@@ -48,21 +48,33 @@ class HiveSchemaConverter {
private int id;
private final boolean autoConvert;
+ private final DefaultMetadataPolicy defaultPolicy;
- private HiveSchemaConverter(boolean autoConvert) {
+ private HiveSchemaConverter(boolean autoConvert, DefaultMetadataPolicy defaultPolicy) {
this.autoConvert = autoConvert;
+ this.defaultPolicy = defaultPolicy;
// Iceberg starts field id assignment from 1.
this.id = 1;
}
static Schema convert(List names, List typeInfos, List comments, boolean autoConvert,
Map defaultValues) {
- HiveSchemaConverter converter = new HiveSchemaConverter(autoConvert);
+ return convert(names, typeInfos, comments, autoConvert, defaultValues, DefaultMetadataPolicy.WRITE_DEFAULT);
+ }
+
+ static Schema convert(List names, List typeInfos, List comments, boolean autoConvert,
+ Map defaultValues, DefaultMetadataPolicy defaultPolicy) {
+ HiveSchemaConverter converter = new HiveSchemaConverter(autoConvert, defaultPolicy);
return new Schema(converter.convertInternal(names, typeInfos, defaultValues, comments));
}
public static Type convert(TypeInfo typeInfo, boolean autoConvert, String defaultValue) {
- HiveSchemaConverter converter = new HiveSchemaConverter(autoConvert);
+ return convert(typeInfo, autoConvert, defaultValue, DefaultMetadataPolicy.WRITE_DEFAULT);
+ }
+
+ public static Type convert(
+ TypeInfo typeInfo, boolean autoConvert, String defaultValue, DefaultMetadataPolicy defaultPolicy) {
+ HiveSchemaConverter converter = new HiveSchemaConverter(autoConvert, defaultPolicy);
return converter.convertType(typeInfo, defaultValue);
}
@@ -86,7 +98,7 @@ List convertInternal(List names, List typeI
if (type.isPrimitiveType()) {
Object icebergDefaultValue = HiveSchemaUtil.getDefaultValue(defaultValues.get(columnName), type);
if (icebergDefaultValue != null) {
- fieldBuilder.withWriteDefault(Expressions.lit(icebergDefaultValue));
+ applyDefaultMetadata(fieldBuilder, icebergDefaultValue);
}
} else if (!type.isStructType()) {
throw new UnsupportedOperationException(
@@ -99,6 +111,14 @@ List convertInternal(List names, List typeI
return result;
}
+ private void applyDefaultMetadata(Types.NestedField.Builder fieldBuilder, Object icebergDefaultValue) {
+ org.apache.iceberg.expressions.Literal literal = Expressions.lit(icebergDefaultValue);
+ fieldBuilder.withWriteDefault(literal);
+ if (defaultPolicy == DefaultMetadataPolicy.WRITE_AND_INITIAL_DEFAULT) {
+ fieldBuilder.withInitialDefault(literal);
+ }
+ }
+
Type convertType(TypeInfo typeInfo, String defaultValue) {
switch (typeInfo.getCategory()) {
case PRIMITIVE:
diff --git a/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveSchemaUtil.java b/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveSchemaUtil.java
index b1563040bcb9..443b5804852c 100644
--- a/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveSchemaUtil.java
+++ b/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveSchemaUtil.java
@@ -154,6 +154,10 @@ public static Type convert(TypeInfo typeInfo, String defaultValue) {
return HiveSchemaConverter.convert(typeInfo, false, defaultValue);
}
+ public static Type convert(TypeInfo typeInfo, String defaultValue, DefaultMetadataPolicy defaultPolicy) {
+ return HiveSchemaConverter.convert(typeInfo, false, defaultValue, defaultPolicy);
+ }
+
/**
* Returns a SchemaDifference containing those fields which are present in only one of the collections, as well as
* those fields which are present in both (in terms of the name) but their type or comment has changed.
@@ -429,6 +433,24 @@ public static void setDefaultValues(Record record, List missi
}
}
+ /**
+ * Backfills struct column that is null on read using nested {@code initialDefault} metadata.
+ * This applies to rows written before {@code ADD COLUMNS} added the struct.
+ * Spec allows struct defaults as {@code {}} (see https://iceberg.apache.org/spec/#default-values), but
+ * {@code UpdateSchema} add column only supports primitives today;
+ * if empty structs are allowed, this backfill can be removed.
+ */
+ public static void backfillStructInitialDefaults(Record record, List columns) {
+ for (Types.NestedField field : columns) {
+ if (field.type().isStructType() && record.getField(field.name()) == null) {
+ Record nestedRecord = buildStructWithInitialDefaults(field.type().asStructType());
+ if (nestedRecord != null) {
+ record.setField(field.name(), nestedRecord);
+ }
+ }
+ }
+ }
+
/**
* Recursively builds a struct populated with write defaults.
* * @return A populated Record, or null if no nested fields have defaults.
@@ -458,6 +480,45 @@ private static Record buildStructWithDefaults(Types.StructType structType) {
return hasAnyDefault ? nestedRecord : null;
}
+ private static Record buildStructWithInitialDefaults(Types.StructType structType) {
+ Record nestedRecord = GenericRecord.create(structType);
+ boolean hasAnyDefault = false;
+
+ for (Types.NestedField field : structType.fields()) {
+ if (field.initialDefault() != null) {
+ Object defaultValue = convertToWriteType(field.initialDefault(), field.type());
+ nestedRecord.setField(field.name(), defaultValue);
+ hasAnyDefault = true;
+ } else if (field.type().isStructType()) {
+ Record deeperRecord = buildStructWithInitialDefaults(field.type().asStructType());
+ if (deeperRecord != null) {
+ nestedRecord.setField(field.name(), deeperRecord);
+ hasAnyDefault = true;
+ }
+ }
+ }
+
+ return hasAnyDefault ? nestedRecord : null;
+ }
+
+ /**
+ * Builds a map of nested field names to their initial-default values for a struct column.
+ *
+ * @return field-name to initial-default map, or empty if no nested field has an initial-default
+ */
+ public static Map getStructInitialDefaults(Types.StructType structType) {
+ Map result = Maps.newHashMap();
+ for (Types.NestedField field : structType.fields()) {
+ if (field.initialDefault() != null) {
+ result.put(field.name(), field.initialDefault());
+ } else if (field.type().isStructType()) {
+ Map nested = getStructInitialDefaults(field.type().asStructType());
+ result.put(field.name(), nested);
+ }
+ }
+ return result;
+ }
+
/**
* Sets a value into a {@link Record} using a struct-only field path (top-level column or nested
* through structs). Intermediate struct records are created as needed.
@@ -496,21 +557,6 @@ private static Record getOrCreateStructRecord(
return record;
}
- // Special method for nested structs that always applies defaults to null fields
- private static void setDefaultValuesForNestedStruct(Record record, List fields) {
- for (Types.NestedField field : fields) {
- Object fieldValue = record.getField(field.name());
-
- if (field.writeDefault() != null) {
- Object defaultValue = convertToWriteType(field.writeDefault(), field.type());
- record.setField(field.name(), defaultValue);
- } else if (field.type().isStructType()) {
- // Recursively process nested structs
- setDefaultValuesForNestedStruct((Record) fieldValue, field.type().asStructType().fields());
- }
- }
- }
-
public static Object convertToWriteType(Object value, Type type) {
if (value == null) {
return null;
diff --git a/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/TestHiveSchemaUtil.java b/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/TestHiveSchemaUtil.java
index 6daf3aeca5d7..e0373374033e 100644
--- a/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/TestHiveSchemaUtil.java
+++ b/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/TestHiveSchemaUtil.java
@@ -28,6 +28,9 @@
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
import org.apache.iceberg.Schema;
+import org.apache.iceberg.data.GenericRecord;
+import org.apache.iceberg.data.Record;
+import org.apache.iceberg.expressions.Expressions;
import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList;
import org.apache.iceberg.relocated.com.google.common.collect.Lists;
import org.apache.iceberg.types.Type;
@@ -240,4 +243,37 @@ private void assertEquals(Type expected, Type actual) {
}
}
}
+
+ @Test
+ public void testBackfillStructInitialDefaults() {
+ Schema schema = new Schema(
+ optional(1, "id", Types.IntegerType.get()),
+ optional(2, "point", Types.StructType.of(
+ Types.NestedField.builder()
+ .withId(3)
+ .withName("x")
+ .asOptional()
+ .ofType(Types.IntegerType.get())
+ .withInitialDefault(Expressions.lit(100))
+ .build(),
+ Types.NestedField.builder()
+ .withId(4)
+ .withName("y")
+ .asOptional()
+ .ofType(Types.IntegerType.get())
+ .withInitialDefault(Expressions.lit(99))
+ .build()
+ ))
+ );
+
+ Record record = GenericRecord.create(schema);
+ record.setField("id", 1);
+
+ HiveSchemaUtil.backfillStructInitialDefaults(record, schema.columns());
+
+ Record point = (Record) record.getField("point");
+ assertThat(point).isNotNull();
+ assertThat(point.getField("x")).isEqualTo(100);
+ assertThat(point.getField("y")).isEqualTo(99);
+ }
}
diff --git a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/BaseHiveIcebergMetaHook.java b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/BaseHiveIcebergMetaHook.java
index 917829ac304d..3b0742a16931 100644
--- a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/BaseHiveIcebergMetaHook.java
+++ b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/BaseHiveIcebergMetaHook.java
@@ -60,6 +60,7 @@
import org.apache.iceberg.catalog.TableIdentifier;
import org.apache.iceberg.exceptions.NoSuchTableException;
import org.apache.iceberg.exceptions.NotFoundException;
+import org.apache.iceberg.hive.DefaultMetadataPolicy;
import org.apache.iceberg.hive.HMSTablePropertyHelper;
import org.apache.iceberg.hive.HiveOperationsBase;
import org.apache.iceberg.hive.HiveSchemaUtil;
@@ -490,6 +491,13 @@ static boolean isOrcFileFormat(org.apache.hadoop.hive.metastore.api.Table hmsTab
.equalsIgnoreCase(hmsTable.getParameters().get(TableProperties.DEFAULT_FILE_FORMAT));
}
+ protected static DefaultMetadataPolicy defaultMetadataPolicy(
+ org.apache.hadoop.hive.metastore.api.Table hmsTable) {
+ return isOrcFileFormat(hmsTable) ?
+ DefaultMetadataPolicy.WRITE_DEFAULT :
+ DefaultMetadataPolicy.WRITE_AND_INITIAL_DEFAULT;
+ }
+
protected void setWriteModeDefaults(Table icebergTbl, Map newProps, EnvironmentContext context) {
if ((icebergTbl == null || TableUtil.formatVersion(icebergTbl) == 1) &&
IcebergTableUtil.isV2TableOrAbove(newProps)) {
diff --git a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergMetaHook.java b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergMetaHook.java
index 5a2bbbf70219..b3c931d37503 100644
--- a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergMetaHook.java
+++ b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergMetaHook.java
@@ -114,6 +114,7 @@
import org.apache.iceberg.expressions.UnboundPredicate;
import org.apache.iceberg.expressions.UnboundTerm;
import org.apache.iceberg.hive.CachedClientPool;
+import org.apache.iceberg.hive.DefaultMetadataPolicy;
import org.apache.iceberg.hive.HiveLock;
import org.apache.iceberg.hive.HiveSchemaUtil;
import org.apache.iceberg.hive.HiveTableOperations;
@@ -741,16 +742,21 @@ private void handleAddColumns(org.apache.hadoop.hive.metastore.api.Table hmsTabl
(List) SessionStateUtil.getResource(conf, SessionStateUtil.COLUMN_DEFAULTS).orElse(null);
Map defaultValues = Stream.ofNullable(sqlDefaultConstraints).flatMap(Collection::stream)
.collect(Collectors.toMap(SQLDefaultConstraint::getColumn_name, SQLDefaultConstraint::getDefault_value));
- boolean isORc = isOrcFileFormat(hmsTable);
+ DefaultMetadataPolicy defaultPolicy = defaultMetadataPolicy(hmsTable);
for (FieldSchema addedCol : addedCols) {
String defaultValue = defaultValues.get(addedCol.getName());
- Type type = HiveSchemaUtil.convert(TypeInfoUtils.getTypeInfoFromTypeString(addedCol.getType()), defaultValue);
+ Type type =
+ HiveSchemaUtil.convert(
+ TypeInfoUtils.getTypeInfoFromTypeString(addedCol.getType()), defaultValue, defaultPolicy);
Literal defaultVal = Optional.ofNullable(defaultValue).filter(v -> !type.isStructType())
.map(v -> Expressions.lit(HiveSchemaUtil.getDefaultValue(v, type))).orElse(null);
- // ORC doesn't have support for initialDefault from iceberg layer, we only need to set default for writeDefault.
- updateSchema.addColumn(addedCol.getName(), type, addedCol.getComment(), isORc ? null : defaultVal);
- if (isORc && defaultVal != null) {
+ updateSchema.addColumn(
+ addedCol.getName(),
+ type,
+ addedCol.getComment(),
+ defaultPolicy == DefaultMetadataPolicy.WRITE_AND_INITIAL_DEFAULT ? defaultVal : null);
+ if (defaultPolicy == DefaultMetadataPolicy.WRITE_DEFAULT && defaultVal != null) {
updateSchema.updateColumnDefault(addedCol.getName(), defaultVal);
}
}
diff --git a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/vector/HiveVectorizedReader.java b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/vector/HiveVectorizedReader.java
index 834d762062da..23457c8676cf 100644
--- a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/vector/HiveVectorizedReader.java
+++ b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/vector/HiveVectorizedReader.java
@@ -52,6 +52,7 @@
import org.apache.iceberg.Schema;
import org.apache.iceberg.Table;
import org.apache.iceberg.expressions.Expression;
+import org.apache.iceberg.hive.HiveSchemaUtil;
import org.apache.iceberg.io.CloseableIterable;
import org.apache.iceberg.io.CloseableIterator;
import org.apache.iceberg.mr.InputFormatConfig;
@@ -187,6 +188,10 @@ static Map getInitialColumnDefaults(List colu
for (Types.NestedField column : columns) {
if (column.initialDefault() != null) {
columnDefaults.put(column.name(), column.initialDefault());
+ } else if (column.type().isStructType()) {
+ Map structDefaults =
+ HiveSchemaUtil.getStructInitialDefaults(column.type().asStructType());
+ columnDefaults.put(column.name(), structDefaults);
}
}
return columnDefaults;
diff --git a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/mapreduce/IcebergRecordReader.java b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/mapreduce/IcebergRecordReader.java
index e3d03a6bee66..26c715883340 100644
--- a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/mapreduce/IcebergRecordReader.java
+++ b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/mapreduce/IcebergRecordReader.java
@@ -53,6 +53,7 @@
import org.apache.iceberg.data.parquet.GenericParquetReaders;
import org.apache.iceberg.encryption.EncryptedFiles;
import org.apache.iceberg.expressions.Expression;
+import org.apache.iceberg.hive.HiveSchemaUtil;
import org.apache.iceberg.io.CloseableIterable;
import org.apache.iceberg.io.CloseableIterator;
import org.apache.iceberg.io.InputFile;
@@ -173,7 +174,16 @@ private CloseableIterable openGeneric(FileScanTask task, Schema readSchema) {
default -> throw new UnsupportedOperationException(
String.format("Cannot read %s file: %s", file.format().name(), file.location()));
};
- return applyResidualFiltering(iterable, residual, readSchema);
+ return applyResidualFiltering(withStructInitialDefaultBackfill(iterable, readSchema), residual, readSchema);
+ }
+
+ private CloseableIterable withStructInitialDefaultBackfill(CloseableIterable iterable, Schema readSchema) {
+ return CloseableIterable.transform(iterable, record -> {
+ if (record instanceof Record) {
+ HiveSchemaUtil.backfillStructInitialDefaults((Record) record, readSchema.columns());
+ }
+ return record;
+ });
}
private CloseableIterable newAvroIterable(
diff --git a/iceberg/iceberg-handler/src/test/queries/positive/iceberg_initial_default.q b/iceberg/iceberg-handler/src/test/queries/positive/iceberg_initial_default.q
index c0c058bcd5dc..c7007114e5be 100644
--- a/iceberg/iceberg-handler/src/test/queries/positive/iceberg_initial_default.q
+++ b/iceberg/iceberg-handler/src/test/queries/positive/iceberg_initial_default.q
@@ -13,7 +13,14 @@ ALTER TABLE ice_parq ADD COLUMNS (point STRUCT DEFAULT '{"x":100,"
created_date DATE DEFAULT '2024-01-01',
created_ts TIMESTAMP DEFAULT '2024-01-01T10:00:00',
score DECIMAL(5,2) DEFAULT 100.00,
- category STRING DEFAULT 'general');
+ category STRING DEFAULT 'general',
+ person STRUCT<
+ name: STRING,
+ address: STRUCT<
+ street: STRING,
+ city: STRING
+ >
+ > DEFAULT '{"name":"John","address":{"street":"Main St","city":"New York"}}');
INSERT INTO ice_parq (id) VALUES (2);
@@ -45,7 +52,14 @@ ALTER TABLE ice_avro ADD COLUMNS (point STRUCT DEFAULT '{"x":100,"
created_date DATE DEFAULT '2024-01-01',
created_ts TIMESTAMP DEFAULT '2024-01-01T10:00:00',
score DECIMAL(5,2) DEFAULT 100.00,
- category STRING DEFAULT 'general');
+ category STRING DEFAULT 'general',
+ person STRUCT<
+ name: STRING,
+ address: STRUCT<
+ street: STRING,
+ city: STRING
+ >
+ > DEFAULT '{"name":"John","address":{"street":"Main St","city":"New York"}}');
INSERT INTO ice_avro (id) VALUES (2);
@@ -77,7 +91,14 @@ ALTER TABLE ice_orc ADD COLUMNS (point STRUCT DEFAULT '{"x":100,"y
created_date DATE DEFAULT '2024-01-01',
created_ts TIMESTAMP DEFAULT '2024-01-01T10:00:00',
score DECIMAL(5,2) DEFAULT 100.00,
- category STRING DEFAULT 'general');
+ category STRING DEFAULT 'general',
+ person STRUCT<
+ name: STRING,
+ address: STRUCT<
+ street: STRING,
+ city: STRING
+ >
+ > DEFAULT '{"name":"John","address":{"street":"Main St","city":"New York"}}');
INSERT INTO ice_orc (id) VALUES (2);
diff --git a/iceberg/iceberg-handler/src/test/results/positive/iceberg_initial_default.q.out b/iceberg/iceberg-handler/src/test/results/positive/iceberg_initial_default.q.out
index da857354d811..d15997e7d81f 100644
--- a/iceberg/iceberg-handler/src/test/results/positive/iceberg_initial_default.q.out
+++ b/iceberg/iceberg-handler/src/test/results/positive/iceberg_initial_default.q.out
@@ -28,7 +28,14 @@ PREHOOK: query: ALTER TABLE ice_parq ADD COLUMNS (point STRUCT DEF
created_date DATE DEFAULT '2024-01-01',
created_ts TIMESTAMP DEFAULT '2024-01-01T10:00:00',
score DECIMAL(5,2) DEFAULT 100.00,
- category STRING DEFAULT 'general')
+ category STRING DEFAULT 'general',
+ person STRUCT<
+ name: STRING,
+ address: STRUCT<
+ street: STRING,
+ city: STRING
+ >
+ > DEFAULT '{"name":"John","address":{"street":"Main St","city":"New York"}}')
PREHOOK: type: ALTERTABLE_ADDCOLS
PREHOOK: Input: default@ice_parq
PREHOOK: Output: default@ice_parq
@@ -40,7 +47,14 @@ POSTHOOK: query: ALTER TABLE ice_parq ADD COLUMNS (point STRUCT DE
created_date DATE DEFAULT '2024-01-01',
created_ts TIMESTAMP DEFAULT '2024-01-01T10:00:00',
score DECIMAL(5,2) DEFAULT 100.00,
- category STRING DEFAULT 'general')
+ category STRING DEFAULT 'general',
+ person STRUCT<
+ name: STRING,
+ address: STRUCT<
+ street: STRING,
+ city: STRING
+ >
+ > DEFAULT '{"name":"John","address":{"street":"Main St","city":"New York"}}')
POSTHOOK: type: ALTERTABLE_ADDCOLS
POSTHOOK: Input: default@ice_parq
POSTHOOK: Output: default@ice_parq
@@ -60,8 +74,8 @@ POSTHOOK: query: SELECT * FROM ice_parq ORDER BY id
POSTHOOK: type: QUERY
POSTHOOK: Input: default@ice_parq
POSTHOOK: Output: hdfs://### HDFS PATH ###
-1 NULL unknown 25 50000.0 true 2024-01-01 2024-01-01 10:00:00 100.00 general
-2 {"x":100,"y":99} unknown 25 50000.0 true 2024-01-01 2024-01-01 10:00:00 100.00 general
+1 {"x":100,"y":99} unknown 25 50000.0 true 2024-01-01 2024-01-01 10:00:00 100.00 general {"name":"John","address":{"street":"Main St","city":"New York"}}
+2 {"x":100,"y":99} unknown 25 50000.0 true 2024-01-01 2024-01-01 10:00:00 100.00 general {"name":"John","address":{"street":"Main St","city":"New York"}}
PREHOOK: query: ALTER TABLE ice_parq CHANGE COLUMN point point STRUCT DEFAULT '{"x":100,"y":88}'
PREHOOK: type: ALTERTABLE_RENAMECOL
PREHOOK: Input: default@ice_parq
@@ -94,9 +108,9 @@ POSTHOOK: query: SELECT * FROM ice_parq ORDER BY id
POSTHOOK: type: QUERY
POSTHOOK: Input: default@ice_parq
POSTHOOK: Output: hdfs://### HDFS PATH ###
-1 NULL unknown 25 50000.0 true 2024-01-01 2024-01-01 10:00:00 100.00 general
-2 {"x":100,"y":99} unknown 25 50000.0 true 2024-01-01 2024-01-01 10:00:00 100.00 general
-3 {"x":100,"y":88} unknown 21 50000.0 true 2024-01-01 2024-01-01 10:00:00 100.00 general
+1 {"x":100,"y":99} unknown 25 50000.0 true 2024-01-01 2024-01-01 10:00:00 100.00 general {"name":"John","address":{"street":"Main St","city":"New York"}}
+2 {"x":100,"y":99} unknown 25 50000.0 true 2024-01-01 2024-01-01 10:00:00 100.00 general {"name":"John","address":{"street":"Main St","city":"New York"}}
+3 {"x":100,"y":88} unknown 21 50000.0 true 2024-01-01 2024-01-01 10:00:00 100.00 general {"name":"John","address":{"street":"Main St","city":"New York"}}
PREHOOK: query: CREATE TABLE ice_avro (
id INT)
STORED BY ICEBERG stored as avro
@@ -127,7 +141,14 @@ PREHOOK: query: ALTER TABLE ice_avro ADD COLUMNS (point STRUCT DEF
created_date DATE DEFAULT '2024-01-01',
created_ts TIMESTAMP DEFAULT '2024-01-01T10:00:00',
score DECIMAL(5,2) DEFAULT 100.00,
- category STRING DEFAULT 'general')
+ category STRING DEFAULT 'general',
+ person STRUCT<
+ name: STRING,
+ address: STRUCT<
+ street: STRING,
+ city: STRING
+ >
+ > DEFAULT '{"name":"John","address":{"street":"Main St","city":"New York"}}')
PREHOOK: type: ALTERTABLE_ADDCOLS
PREHOOK: Input: default@ice_avro
PREHOOK: Output: default@ice_avro
@@ -139,7 +160,14 @@ POSTHOOK: query: ALTER TABLE ice_avro ADD COLUMNS (point STRUCT DE
created_date DATE DEFAULT '2024-01-01',
created_ts TIMESTAMP DEFAULT '2024-01-01T10:00:00',
score DECIMAL(5,2) DEFAULT 100.00,
- category STRING DEFAULT 'general')
+ category STRING DEFAULT 'general',
+ person STRUCT<
+ name: STRING,
+ address: STRUCT<
+ street: STRING,
+ city: STRING
+ >
+ > DEFAULT '{"name":"John","address":{"street":"Main St","city":"New York"}}')
POSTHOOK: type: ALTERTABLE_ADDCOLS
POSTHOOK: Input: default@ice_avro
POSTHOOK: Output: default@ice_avro
@@ -159,8 +187,8 @@ POSTHOOK: query: SELECT * FROM ice_avro ORDER BY id
POSTHOOK: type: QUERY
POSTHOOK: Input: default@ice_avro
POSTHOOK: Output: hdfs://### HDFS PATH ###
-1 NULL unknown 25 50000.0 true 2024-01-01 2024-01-01 10:00:00 100.00 general
-2 {"x":100,"y":99} unknown 25 50000.0 true 2024-01-01 2024-01-01 10:00:00 100.00 general
+1 {"x":100,"y":99} unknown 25 50000.0 true 2024-01-01 2024-01-01 10:00:00 100.00 general {"name":"John","address":{"street":"Main St","city":"New York"}}
+2 {"x":100,"y":99} unknown 25 50000.0 true 2024-01-01 2024-01-01 10:00:00 100.00 general {"name":"John","address":{"street":"Main St","city":"New York"}}
PREHOOK: query: ALTER TABLE ice_avro CHANGE COLUMN point point STRUCT DEFAULT '{"x":100,"y":88}'
PREHOOK: type: ALTERTABLE_RENAMECOL
PREHOOK: Input: default@ice_avro
@@ -193,9 +221,9 @@ POSTHOOK: query: SELECT * FROM ice_avro ORDER BY id
POSTHOOK: type: QUERY
POSTHOOK: Input: default@ice_avro
POSTHOOK: Output: hdfs://### HDFS PATH ###
-1 NULL unknown 25 50000.0 true 2024-01-01 2024-01-01 10:00:00 100.00 general
-2 {"x":100,"y":99} unknown 25 50000.0 true 2024-01-01 2024-01-01 10:00:00 100.00 general
-3 {"x":100,"y":88} unknown 21 50000.0 true 2024-01-01 2024-01-01 10:00:00 100.00 general
+1 {"x":100,"y":99} unknown 25 50000.0 true 2024-01-01 2024-01-01 10:00:00 100.00 general {"name":"John","address":{"street":"Main St","city":"New York"}}
+2 {"x":100,"y":99} unknown 25 50000.0 true 2024-01-01 2024-01-01 10:00:00 100.00 general {"name":"John","address":{"street":"Main St","city":"New York"}}
+3 {"x":100,"y":88} unknown 21 50000.0 true 2024-01-01 2024-01-01 10:00:00 100.00 general {"name":"John","address":{"street":"Main St","city":"New York"}}
PREHOOK: query: CREATE TABLE ice_orc (
id INT)
STORED BY ICEBERG stored as orc
@@ -226,7 +254,14 @@ PREHOOK: query: ALTER TABLE ice_orc ADD COLUMNS (point STRUCT DEFA
created_date DATE DEFAULT '2024-01-01',
created_ts TIMESTAMP DEFAULT '2024-01-01T10:00:00',
score DECIMAL(5,2) DEFAULT 100.00,
- category STRING DEFAULT 'general')
+ category STRING DEFAULT 'general',
+ person STRUCT<
+ name: STRING,
+ address: STRUCT<
+ street: STRING,
+ city: STRING
+ >
+ > DEFAULT '{"name":"John","address":{"street":"Main St","city":"New York"}}')
PREHOOK: type: ALTERTABLE_ADDCOLS
PREHOOK: Input: default@ice_orc
PREHOOK: Output: default@ice_orc
@@ -238,7 +273,14 @@ POSTHOOK: query: ALTER TABLE ice_orc ADD COLUMNS (point STRUCT DEF
created_date DATE DEFAULT '2024-01-01',
created_ts TIMESTAMP DEFAULT '2024-01-01T10:00:00',
score DECIMAL(5,2) DEFAULT 100.00,
- category STRING DEFAULT 'general')
+ category STRING DEFAULT 'general',
+ person STRUCT<
+ name: STRING,
+ address: STRUCT<
+ street: STRING,
+ city: STRING
+ >
+ > DEFAULT '{"name":"John","address":{"street":"Main St","city":"New York"}}')
POSTHOOK: type: ALTERTABLE_ADDCOLS
POSTHOOK: Input: default@ice_orc
POSTHOOK: Output: default@ice_orc
@@ -258,8 +300,8 @@ POSTHOOK: query: SELECT * FROM ice_orc ORDER BY id
POSTHOOK: type: QUERY
POSTHOOK: Input: default@ice_orc
POSTHOOK: Output: hdfs://### HDFS PATH ###
-1 NULL NULL NULL NULL NULL NULL NULL NULL NULL
-2 {"x":100,"y":99} unknown 25 50000.0 true 2024-01-01 2024-01-01 10:00:00 100.00 general
+1 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL
+2 {"x":100,"y":99} unknown 25 50000.0 true 2024-01-01 2024-01-01 10:00:00 100.00 general {"name":"John","address":{"street":"Main St","city":"New York"}}
PREHOOK: query: ALTER TABLE ice_orc CHANGE COLUMN point point STRUCT DEFAULT '{"x":100,"y":88}'
PREHOOK: type: ALTERTABLE_RENAMECOL
PREHOOK: Input: default@ice_orc
@@ -292,9 +334,9 @@ POSTHOOK: query: SELECT * FROM ice_orc ORDER BY id
POSTHOOK: type: QUERY
POSTHOOK: Input: default@ice_orc
POSTHOOK: Output: hdfs://### HDFS PATH ###
-1 NULL NULL NULL NULL NULL NULL NULL NULL NULL
-2 {"x":100,"y":99} unknown 25 50000.0 true 2024-01-01 2024-01-01 10:00:00 100.00 general
-3 {"x":100,"y":88} unknown 21 50000.0 true 2024-01-01 2024-01-01 10:00:00 100.00 general
+1 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL
+2 {"x":100,"y":99} unknown 25 50000.0 true 2024-01-01 2024-01-01 10:00:00 100.00 general {"name":"John","address":{"street":"Main St","city":"New York"}}
+3 {"x":100,"y":88} unknown 21 50000.0 true 2024-01-01 2024-01-01 10:00:00 100.00 general {"name":"John","address":{"street":"Main St","city":"New York"}}
PREHOOK: query: SELECT * FROM ice_parq ORDER BY id
PREHOOK: type: QUERY
PREHOOK: Input: default@ice_parq
@@ -303,9 +345,9 @@ POSTHOOK: query: SELECT * FROM ice_parq ORDER BY id
POSTHOOK: type: QUERY
POSTHOOK: Input: default@ice_parq
POSTHOOK: Output: hdfs://### HDFS PATH ###
-1 NULL unknown 25 50000.0 true 2024-01-01 2024-01-01 10:00:00 100.00 general
-2 {"x":100,"y":99} unknown 25 50000.0 true 2024-01-01 2024-01-01 10:00:00 100.00 general
-3 {"x":100,"y":88} unknown 21 50000.0 true 2024-01-01 2024-01-01 10:00:00 100.00 general
+1 {"x":100,"y":99} unknown 25 50000.0 true 2024-01-01 2024-01-01 10:00:00 100.00 general {"name":"John","address":{"street":"Main St","city":"New York"}}
+2 {"x":100,"y":99} unknown 25 50000.0 true 2024-01-01 2024-01-01 10:00:00 100.00 general {"name":"John","address":{"street":"Main St","city":"New York"}}
+3 {"x":100,"y":88} unknown 21 50000.0 true 2024-01-01 2024-01-01 10:00:00 100.00 general {"name":"John","address":{"street":"Main St","city":"New York"}}
PREHOOK: query: SELECT * FROM ice_avro ORDER BY id
PREHOOK: type: QUERY
PREHOOK: Input: default@ice_avro
@@ -314,9 +356,9 @@ POSTHOOK: query: SELECT * FROM ice_avro ORDER BY id
POSTHOOK: type: QUERY
POSTHOOK: Input: default@ice_avro
POSTHOOK: Output: hdfs://### HDFS PATH ###
-1 NULL unknown 25 50000.0 true 2024-01-01 2024-01-01 10:00:00 100.00 general
-2 {"x":100,"y":99} unknown 25 50000.0 true 2024-01-01 2024-01-01 10:00:00 100.00 general
-3 {"x":100,"y":88} unknown 21 50000.0 true 2024-01-01 2024-01-01 10:00:00 100.00 general
+1 {"x":100,"y":99} unknown 25 50000.0 true 2024-01-01 2024-01-01 10:00:00 100.00 general {"name":"John","address":{"street":"Main St","city":"New York"}}
+2 {"x":100,"y":99} unknown 25 50000.0 true 2024-01-01 2024-01-01 10:00:00 100.00 general {"name":"John","address":{"street":"Main St","city":"New York"}}
+3 {"x":100,"y":88} unknown 21 50000.0 true 2024-01-01 2024-01-01 10:00:00 100.00 general {"name":"John","address":{"street":"Main St","city":"New York"}}
PREHOOK: query: SELECT * FROM ice_orc ORDER BY id
PREHOOK: type: QUERY
PREHOOK: Input: default@ice_orc
@@ -325,6 +367,6 @@ POSTHOOK: query: SELECT * FROM ice_orc ORDER BY id
POSTHOOK: type: QUERY
POSTHOOK: Input: default@ice_orc
POSTHOOK: Output: hdfs://### HDFS PATH ###
-1 NULL NULL NULL NULL NULL NULL NULL NULL NULL
-2 {"x":100,"y":99} unknown 25 50000.0 true 2024-01-01 2024-01-01 10:00:00 100.00 general
-3 {"x":100,"y":88} unknown 21 50000.0 true 2024-01-01 2024-01-01 10:00:00 100.00 general
+1 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL
+2 {"x":100,"y":99} unknown 25 50000.0 true 2024-01-01 2024-01-01 10:00:00 100.00 general {"name":"John","address":{"street":"Main St","city":"New York"}}
+3 {"x":100,"y":88} unknown 21 50000.0 true 2024-01-01 2024-01-01 10:00:00 100.00 general {"name":"John","address":{"street":"Main St","city":"New York"}}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedDummyColumnReader.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedDummyColumnReader.java
index e8d95ccd1587..a31b0e69858e 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedDummyColumnReader.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedDummyColumnReader.java
@@ -24,14 +24,18 @@
import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.StructColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
+import java.util.List;
+import java.util.Map;
/**
* A dummy vectorized parquet reader used for schema evolution.
@@ -64,6 +68,41 @@ public void readBatch(int total, ColumnVector col, TypeInfo typeInfo) throws IOE
if (typeInfo.getCategory() == ObjectInspector.Category.PRIMITIVE) {
fillPrimitive(col, (PrimitiveTypeInfo) typeInfo, defaultValue);
+ } else if (typeInfo.getCategory() == ObjectInspector.Category.STRUCT) {
+ fillStruct(col, (StructTypeInfo) typeInfo, defaultValue);
+ } else {
+ throw new IOException("Unsupported type category in DummyColumnReader: " + typeInfo.getCategory());
+ }
+ }
+
+ private void fillStruct(ColumnVector col, StructTypeInfo structTypeInfo, Object defaultValue) throws IOException {
+ StructColumnVector structCol = (StructColumnVector) col;
+ List fieldNames = structTypeInfo.getAllStructFieldNames();
+ List fieldTypes = structTypeInfo.getAllStructFieldTypeInfos();
+ Map fieldDefaults = defaultValue instanceof Map ? (Map) defaultValue : null;
+
+ for (int i = 0; i < fieldNames.size(); i++) {
+ Object fieldDefault = fieldDefaults != null ? fieldDefaults.get(fieldNames.get(i)) : null;
+ fillStructField(structCol.fields[i], fieldTypes.get(i), fieldDefault);
+ }
+ }
+
+ private void fillStructField(ColumnVector col, TypeInfo typeInfo, Object fieldDefault) throws IOException {
+ if (fieldDefault == null) {
+ col.isRepeating = true;
+ Arrays.fill(col.isNull, true);
+ col.noNulls = false;
+ return;
+ }
+
+ col.isRepeating = true;
+ col.noNulls = true;
+ col.isNull[0] = false;
+
+ if (typeInfo.getCategory() == ObjectInspector.Category.PRIMITIVE) {
+ fillPrimitive(col, (PrimitiveTypeInfo) typeInfo, fieldDefault);
+ } else if (typeInfo.getCategory() == ObjectInspector.Category.STRUCT) {
+ fillStruct(col, (StructTypeInfo) typeInfo, fieldDefault);
} else {
throw new IOException("Unsupported type category in DummyColumnReader: " + typeInfo.getCategory());
}