Skip to content

Commit 2c0ee8e

Browse files
ajantha-bhatjackylk
authored andcommitted
[CARBONDATA-3653] Support huge data for complex child columns
Why is this PR needed? Currently complex child columns string and binary is stored as short length. So, if the data is more than 32000 characters. Data load will fail for binary and long string columns. What changes were proposed in this PR? complex child columns string, binary, decimal, date is stored as byte_array page with short length. Changed it to int length. [Just separating string and binary is hard now, to do in future] Handled compatibility by introducing the new encoding type for complex child columns Does this PR introduce any user interface change? No Is any new testcase added? Yes This closes apache#3562
1 parent 71a4cf4 commit 2c0ee8e

File tree

23 files changed

+229
-43
lines changed

23 files changed

+229
-43
lines changed

core/src/main/java/org/apache/carbondata/core/datastore/page/ColumnPage.java

+16-7
Original file line numberDiff line numberDiff line change
@@ -698,7 +698,8 @@ public byte[] getBooleanPage() {
698698
* @return
699699
* @throws IOException
700700
*/
701-
public abstract byte[] getComplexChildrenLVFlattenedBytePage() throws IOException;
701+
public abstract byte[] getComplexChildrenLVFlattenedBytePage(DataType dataType)
702+
throws IOException;
702703

703704
/**
704705
* For complex type columns
@@ -746,7 +747,8 @@ public long getPageLengthInBytes() throws IOException {
746747
return getDecimalPage().length;
747748
} else if (dataType == BYTE_ARRAY
748749
&& columnPageEncoderMeta.getColumnSpec().getColumnType() == ColumnType.COMPLEX_PRIMITIVE) {
749-
return getComplexChildrenLVFlattenedBytePage().length;
750+
return getComplexChildrenLVFlattenedBytePage(
751+
columnPageEncoderMeta.getColumnSpec().getSchemaDataType()).length;
750752
} else if (dataType == BYTE_ARRAY
751753
&& (columnPageEncoderMeta.getColumnSpec().getColumnType() == ColumnType.COMPLEX_STRUCT
752754
|| columnPageEncoderMeta.getColumnSpec().getColumnType() == ColumnType.COMPLEX_ARRAY
@@ -785,7 +787,8 @@ public byte[] compress(Compressor compressor) throws MemoryException, IOExceptio
785787
return compressor.compressByte(getDecimalPage());
786788
} else if (dataType == BYTE_ARRAY
787789
&& columnPageEncoderMeta.getColumnSpec().getColumnType() == ColumnType.COMPLEX_PRIMITIVE) {
788-
return compressor.compressByte(getComplexChildrenLVFlattenedBytePage());
790+
return compressor.compressByte(getComplexChildrenLVFlattenedBytePage(
791+
columnPageEncoderMeta.getColumnSpec().getSchemaDataType()));
789792
} else if (dataType == BYTE_ARRAY
790793
&& (columnPageEncoderMeta.getColumnSpec().getColumnType() == ColumnType.COMPLEX_STRUCT
791794
|| columnPageEncoderMeta.getColumnSpec().getColumnType() == ColumnType.COMPLEX_ARRAY
@@ -805,8 +808,8 @@ public byte[] compress(Compressor compressor) throws MemoryException, IOExceptio
805808
* Decompress data and create a column page using the decompressed data,
806809
* except for decimal page
807810
*/
808-
public static ColumnPage decompress(ColumnPageEncoderMeta meta, byte[] compressedData,
809-
int offset, int length, boolean isLVEncoded)
811+
public static ColumnPage decompress(ColumnPageEncoderMeta meta, byte[] compressedData, int offset,
812+
int length, boolean isLVEncoded, boolean isComplexPrimitiveIntLengthEncoding)
810813
throws MemoryException {
811814
Compressor compressor = CompressorFactory.getInstance().getCompressor(meta.getCompressorName());
812815
TableSpec.ColumnSpec columnSpec = meta.getColumnSpec();
@@ -836,8 +839,14 @@ public static ColumnPage decompress(ColumnPageEncoderMeta meta, byte[] compresse
836839
columnSpec.getColumnType() == ColumnType.COMPLEX_PRIMITIVE
837840
|| columnSpec.getColumnType() == ColumnType.PLAIN_VALUE)) {
838841
byte[] lvVarBytes = compressor.unCompressByte(compressedData, offset, length);
839-
return newComplexLVBytesPage(columnSpec, lvVarBytes,
840-
CarbonCommonConstants.SHORT_SIZE_IN_BYTE, meta.getCompressorName());
842+
if (isComplexPrimitiveIntLengthEncoding) {
843+
// decode as int length
844+
return newComplexLVBytesPage(columnSpec, lvVarBytes,
845+
CarbonCommonConstants.INT_SIZE_IN_BYTE, meta.getCompressorName());
846+
} else {
847+
return newComplexLVBytesPage(columnSpec, lvVarBytes,
848+
CarbonCommonConstants.SHORT_SIZE_IN_BYTE, meta.getCompressorName());
849+
}
841850
} else if (isLVEncoded && storeDataType == BYTE_ARRAY &&
842851
columnSpec.getColumnType() == ColumnType.COMPLEX_PRIMITIVE) {
843852
byte[] lvVarBytes = compressor.unCompressByte(compressedData, offset, length);

core/src/main/java/org/apache/carbondata/core/datastore/page/LazyColumnPage.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -179,7 +179,7 @@ public byte[] getLVFlattenedBytePage() throws IOException {
179179
}
180180

181181
@Override
182-
public byte[] getComplexChildrenLVFlattenedBytePage() {
182+
public byte[] getComplexChildrenLVFlattenedBytePage(DataType dataType) {
183183
throw new UnsupportedOperationException("internal error");
184184
}
185185

core/src/main/java/org/apache/carbondata/core/datastore/page/LocalDictColumnPage.java

+4-3
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
import org.apache.carbondata.core.localdictionary.PageLevelDictionary;
2929
import org.apache.carbondata.core.localdictionary.exception.DictionaryThresholdReachedException;
3030
import org.apache.carbondata.core.localdictionary.generator.LocalDictionaryGenerator;
31+
import org.apache.carbondata.core.metadata.datatype.DataType;
3132

3233
import org.apache.log4j.Logger;
3334

@@ -364,11 +365,11 @@ public byte[] getLVFlattenedBytePage() throws IOException {
364365
}
365366

366367
@Override
367-
public byte[] getComplexChildrenLVFlattenedBytePage() throws IOException {
368+
public byte[] getComplexChildrenLVFlattenedBytePage(DataType dataType) throws IOException {
368369
if (null != encodedDataColumnPage) {
369-
return encodedDataColumnPage.getComplexChildrenLVFlattenedBytePage();
370+
return encodedDataColumnPage.getComplexChildrenLVFlattenedBytePage(dataType);
370371
} else {
371-
return actualDataColumnPage.getComplexChildrenLVFlattenedBytePage();
372+
return actualDataColumnPage.getComplexChildrenLVFlattenedBytePage(dataType);
372373
}
373374
}
374375

core/src/main/java/org/apache/carbondata/core/datastore/page/SafeFixLengthColumnPage.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -296,7 +296,7 @@ public byte[] getLVFlattenedBytePage() throws IOException {
296296
}
297297

298298
@Override
299-
public byte[] getComplexChildrenLVFlattenedBytePage() throws IOException {
299+
public byte[] getComplexChildrenLVFlattenedBytePage(DataType dataType) throws IOException {
300300
ByteArrayOutputStream stream = new ByteArrayOutputStream();
301301
DataOutputStream out = new DataOutputStream(stream);
302302
for (int i = 0; i < arrayElementCount; i++) {

core/src/main/java/org/apache/carbondata/core/datastore/page/SafeVarLengthColumnPage.java

+8-2
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@
2525
import java.util.List;
2626

2727
import org.apache.carbondata.core.datastore.page.encoding.ColumnPageEncoderMeta;
28+
import org.apache.carbondata.core.metadata.datatype.DataType;
29+
import org.apache.carbondata.core.util.DataTypeUtil;
2830

2931
public class SafeVarLengthColumnPage extends VarLengthColumnPageBase {
3032

@@ -88,11 +90,15 @@ public byte[] getLVFlattenedBytePage() throws IOException {
8890
}
8991

9092
@Override
91-
public byte[] getComplexChildrenLVFlattenedBytePage() throws IOException {
93+
public byte[] getComplexChildrenLVFlattenedBytePage(DataType dataType) throws IOException {
9294
ByteArrayOutputStream stream = new ByteArrayOutputStream();
9395
DataOutputStream out = new DataOutputStream(stream);
9496
for (byte[] byteArrayDatum : byteArrayData) {
95-
out.writeShort((short)byteArrayDatum.length);
97+
if (DataTypeUtil.isByteArrayComplexChildColumn(dataType)) {
98+
out.writeInt(byteArrayDatum.length);
99+
} else {
100+
out.writeShort((short) byteArrayDatum.length);
101+
}
96102
out.write(byteArrayDatum);
97103
}
98104
return stream.toByteArray();

core/src/main/java/org/apache/carbondata/core/datastore/page/UnsafeFixLengthColumnPage.java

+2-1
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
import org.apache.carbondata.core.memory.MemoryBlock;
2525
import org.apache.carbondata.core.memory.MemoryException;
2626
import org.apache.carbondata.core.memory.UnsafeMemoryManager;
27+
import org.apache.carbondata.core.metadata.datatype.DataType;
2728
import org.apache.carbondata.core.metadata.datatype.DataTypes;
2829
import org.apache.carbondata.core.util.ByteUtil;
2930
import org.apache.carbondata.core.util.ThreadLocalTaskInfo;
@@ -396,7 +397,7 @@ public byte[] getLVFlattenedBytePage() {
396397
}
397398

398399
@Override
399-
public byte[] getComplexChildrenLVFlattenedBytePage() {
400+
public byte[] getComplexChildrenLVFlattenedBytePage(DataType dataType) {
400401
byte[] data = new byte[totalLength];
401402
CarbonUnsafe.getUnsafe()
402403
.copyMemory(baseAddress, baseOffset, data, CarbonUnsafe.BYTE_ARRAY_OFFSET, totalLength);

core/src/main/java/org/apache/carbondata/core/datastore/page/VarLengthColumnPageBase.java

+30-9
Original file line numberDiff line numberDiff line change
@@ -243,9 +243,15 @@ private static ColumnPage getComplexLVBytesColumnPage(TableSpec.ColumnSpec colum
243243
int counter = 0;
244244
// extract Length field in input and calculate total length
245245
for (offset = 0; lvEncodedOffset < lvEncodedBytes.length; offset += length) {
246-
length = ByteUtil.toShort(lvEncodedBytes, lvEncodedOffset);
247-
rowOffset.putInt(counter, offset);
248-
lvEncodedOffset += lvLength + length;
246+
if (lvLength == CarbonCommonConstants.INT_SIZE_IN_BYTE) {
247+
length = ByteUtil.toInt(lvEncodedBytes, lvEncodedOffset);
248+
rowOffset.putInt(counter, offset);
249+
lvEncodedOffset += lvLength + length;
250+
} else {
251+
length = ByteUtil.toShort(lvEncodedBytes, lvEncodedOffset);
252+
rowOffset.putInt(counter, offset);
253+
lvEncodedOffset += lvLength + length;
254+
}
249255
rowId++;
250256
counter++;
251257
}
@@ -465,15 +471,30 @@ public byte[] getLVFlattenedBytePage() throws IOException {
465471
}
466472

467473
@Override
468-
public byte[] getComplexChildrenLVFlattenedBytePage() throws IOException {
474+
public byte[] getComplexChildrenLVFlattenedBytePage(DataType dataType) throws IOException {
469475
// output LV encoded byte array
470476
int offset = 0;
471-
byte[] data = new byte[totalLength + ((rowOffset.getActualRowCount() - 1) * 2)];
477+
int outputLength;
478+
if (dataType == DataTypes.BYTE_ARRAY) {
479+
outputLength = totalLength + ((rowOffset.getActualRowCount() - 1)
480+
* CarbonCommonConstants.INT_SIZE_IN_BYTE);
481+
} else {
482+
outputLength = totalLength + ((rowOffset.getActualRowCount() - 1)
483+
* CarbonCommonConstants.SHORT_SIZE_IN_BYTE);
484+
}
485+
byte[] data = new byte[outputLength];
472486
for (int rowId = 0; rowId < rowOffset.getActualRowCount() - 1; rowId++) {
473-
short length = (short) (rowOffset.getInt(rowId + 1) - rowOffset.getInt(rowId));
474-
ByteUtil.setShort(data, offset, length);
475-
copyBytes(rowId, data, offset + 2, length);
476-
offset += 2 + length;
487+
if (dataType == DataTypes.BYTE_ARRAY) {
488+
int length = rowOffset.getInt(rowId + 1) - rowOffset.getInt(rowId);
489+
ByteUtil.setInt(data, offset, length);
490+
copyBytes(rowId, data, offset + CarbonCommonConstants.INT_SIZE_IN_BYTE, length);
491+
offset += CarbonCommonConstants.INT_SIZE_IN_BYTE + length;
492+
} else {
493+
short length = (short) (rowOffset.getInt(rowId + 1) - rowOffset.getInt(rowId));
494+
ByteUtil.setShort(data, offset, length);
495+
copyBytes(rowId, data, offset + CarbonCommonConstants.SHORT_SIZE_IN_BYTE, length);
496+
offset += CarbonCommonConstants.SHORT_SIZE_IN_BYTE + length;
497+
}
477498
}
478499
return data;
479500
}

core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/ColumnPageEncoder.java

+5
Original file line numberDiff line numberDiff line change
@@ -194,6 +194,11 @@ public static EncodedColumnPage[] encodeComplexColumn(ComplexColumnPage input)
194194
while (index < input.getComplexColumnIndex()) {
195195
ColumnPage subColumnPage = input.getColumnPage(index);
196196
encodedPages[index] = encodedColumn(subColumnPage);
197+
// by default add this encoding,
198+
// it is used for checking length of
199+
// complex child byte array columns (short and int)
200+
encodedPages[index].getPageMetadata().getEncoders()
201+
.add(Encoding.INT_LENGTH_COMPLEX_CHILD_BYTE_ARRAY);
197202
index++;
198203
}
199204
return encodedPages;

core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/EncodingFactory.java

+6-1
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,8 @@ public ColumnPageDecoder createDecoder(List<Encoding> encodings, List<ByteBuffer
8383
String compressor, boolean fullVectorFill) throws IOException {
8484
assert (encodings.size() >= 1);
8585
assert (encoderMetas.size() == 1);
86+
boolean isComplexPrimitiveIntLengthEncoding =
87+
encodings.contains(Encoding.INT_LENGTH_COMPLEX_CHILD_BYTE_ARRAY);
8688
Encoding encoding = encodings.get(0);
8789
byte[] encoderMeta = encoderMetas.get(0).array();
8890
ByteArrayInputStream stream = new ByteArrayInputStream(encoderMeta);
@@ -91,7 +93,10 @@ public ColumnPageDecoder createDecoder(List<Encoding> encodings, List<ByteBuffer
9193
ColumnPageEncoderMeta metadata = new ColumnPageEncoderMeta();
9294
metadata.setFillCompleteVector(fullVectorFill);
9395
metadata.readFields(in);
94-
return new DirectCompressCodec(metadata.getStoreDataType()).createDecoder(metadata);
96+
DirectCompressCodec directCompressCodec =
97+
new DirectCompressCodec(metadata.getStoreDataType());
98+
directCompressCodec.setComplexPrimitiveIntLengthEncoding(isComplexPrimitiveIntLengthEncoding);
99+
return directCompressCodec.createDecoder(metadata);
95100
} else if (encoding == ADAPTIVE_INTEGRAL) {
96101
ColumnPageEncoderMeta metadata = new ColumnPageEncoderMeta();
97102
metadata.setFillCompleteVector(fullVectorFill);

core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/adaptive/AdaptiveDeltaFloatingCodec.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,7 @@ public ColumnPageDecoder createDecoder(final ColumnPageEncoderMeta meta) {
128128
@Override
129129
public ColumnPage decode(byte[] input, int offset, int length)
130130
throws MemoryException, IOException {
131-
ColumnPage page = ColumnPage.decompress(meta, input, offset, length, false);
131+
ColumnPage page = ColumnPage.decompress(meta, input, offset, length, false, false);
132132
return LazyColumnPage.newPage(page, converter);
133133
}
134134

core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/adaptive/AdaptiveDeltaIntegralCodec.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,7 @@ public ColumnPage decode(byte[] input, int offset, int length)
139139
if (DataTypes.isDecimal(meta.getSchemaDataType())) {
140140
page = ColumnPage.decompressDecimalPage(meta, input, offset, length);
141141
} else {
142-
page = ColumnPage.decompress(meta, input, offset, length, false);
142+
page = ColumnPage.decompress(meta, input, offset, length, false, false);
143143
}
144144
return LazyColumnPage.newPage(page, converter);
145145
}

core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/adaptive/AdaptiveFloatingCodec.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,7 @@ public ColumnPageDecoder createDecoder(final ColumnPageEncoderMeta meta) {
116116
@Override
117117
public ColumnPage decode(byte[] input, int offset, int length)
118118
throws MemoryException, IOException {
119-
ColumnPage page = ColumnPage.decompress(meta, input, offset, length, false);
119+
ColumnPage page = ColumnPage.decompress(meta, input, offset, length, false, false);
120120
return LazyColumnPage.newPage(page, converter);
121121
}
122122

core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/adaptive/AdaptiveIntegralCodec.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,7 @@ public ColumnPage decode(byte[] input, int offset, int length)
116116
if (DataTypes.isDecimal(meta.getSchemaDataType())) {
117117
page = ColumnPage.decompressDecimalPage(meta, input, offset, length);
118118
} else {
119-
page = ColumnPage.decompress(meta, input, offset, length, false);
119+
page = ColumnPage.decompress(meta, input, offset, length, false, false);
120120
}
121121
return LazyColumnPage.newPage(page, converter);
122122
}

core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/compress/DirectCompressCodec.java

+11-3
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,12 @@ public DirectCompressCodec(DataType dataType) {
5858
this.dataType = dataType;
5959
}
6060

61+
boolean isComplexPrimitiveIntLengthEncoding = false;
62+
63+
public void setComplexPrimitiveIntLengthEncoding(boolean complexPrimitiveIntLengthEncoding) {
64+
isComplexPrimitiveIntLengthEncoding = complexPrimitiveIntLengthEncoding;
65+
}
66+
6167
@Override
6268
public String getName() {
6369
return "DirectCompressCodec";
@@ -102,7 +108,8 @@ public ColumnPage decode(byte[] input, int offset, int length)
102108
if (DataTypes.isDecimal(dataType)) {
103109
decodedPage = ColumnPage.decompressDecimalPage(meta, input, offset, length);
104110
} else {
105-
decodedPage = ColumnPage.decompress(meta, input, offset, length, false);
111+
decodedPage = ColumnPage
112+
.decompress(meta, input, offset, length, false, isComplexPrimitiveIntLengthEncoding);
106113
}
107114
return LazyColumnPage.newPage(decodedPage, converter);
108115
}
@@ -150,8 +157,9 @@ public void decodeAndFillVector(byte[] input, int offset, int length,
150157
@Override
151158
public ColumnPage decode(byte[] input, int offset, int length, boolean isLVEncoded)
152159
throws MemoryException, IOException {
153-
return LazyColumnPage
154-
.newPage(ColumnPage.decompress(meta, input, offset, length, isLVEncoded), converter);
160+
return LazyColumnPage.newPage(ColumnPage
161+
.decompress(meta, input, offset, length, isLVEncoded,
162+
isComplexPrimitiveIntLengthEncoding), converter);
155163
}
156164
};
157165
}

core/src/main/java/org/apache/carbondata/core/metadata/converter/ThriftWrapperSchemaConverterImpl.java

+4
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,8 @@ private org.apache.carbondata.format.Encoding fromWrapperToExternalEncoding(Enco
120120
return org.apache.carbondata.format.Encoding.BIT_PACKED;
121121
case DIRECT_DICTIONARY:
122122
return org.apache.carbondata.format.Encoding.DIRECT_DICTIONARY;
123+
case INT_LENGTH_COMPLEX_CHILD_BYTE_ARRAY:
124+
return org.apache.carbondata.format.Encoding.INT_LENGTH_COMPLEX_CHILD_BYTE_ARRAY;
123125
default:
124126
return org.apache.carbondata.format.Encoding.DICTIONARY;
125127
}
@@ -457,6 +459,8 @@ private Encoding fromExternalToWrapperEncoding(org.apache.carbondata.format.Enco
457459
return Encoding.DIRECT_COMPRESS_VARCHAR;
458460
case BIT_PACKED:
459461
return Encoding.BIT_PACKED;
462+
case INT_LENGTH_COMPLEX_CHILD_BYTE_ARRAY:
463+
return Encoding.INT_LENGTH_COMPLEX_CHILD_BYTE_ARRAY;
460464
case DIRECT_DICTIONARY:
461465
return Encoding.DIRECT_DICTIONARY;
462466
default:

core/src/main/java/org/apache/carbondata/core/metadata/encoder/Encoding.java

+4-1
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,8 @@ public enum Encoding {
3838
ADAPTIVE_FLOATING,
3939
BOOL_BYTE,
4040
ADAPTIVE_DELTA_FLOATING,
41-
DIRECT_COMPRESS_VARCHAR;
41+
DIRECT_COMPRESS_VARCHAR,
42+
INT_LENGTH_COMPLEX_CHILD_BYTE_ARRAY;
4243

4344
public static Encoding valueOf(int ordinal) {
4445
if (ordinal == DICTIONARY.ordinal()) {
@@ -73,6 +74,8 @@ public static Encoding valueOf(int ordinal) {
7374
return ADAPTIVE_DELTA_FLOATING;
7475
} else if (ordinal == DIRECT_COMPRESS_VARCHAR.ordinal()) {
7576
return DIRECT_COMPRESS_VARCHAR;
77+
} else if (ordinal == INT_LENGTH_COMPLEX_CHILD_BYTE_ARRAY.ordinal()) {
78+
return INT_LENGTH_COMPLEX_CHILD_BYTE_ARRAY;
7679
} else {
7780
throw new RuntimeException("create Encoding with invalid ordinal: " + ordinal);
7881
}

core/src/main/java/org/apache/carbondata/core/scan/complextypes/PrimitiveQueryType.java

+11-2
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,11 @@ public void parseBlocksAndReturnComplexColumnByteArray(DimensionRawColumnChunk[]
107107
byte[] currentVal =
108108
copyBlockDataChunk(rawColumnChunks, dimensionColumnPages, rowNumber, pageNumber);
109109
if (!this.isDictionary && !this.isDirectDictionary) {
110-
dataOutputStream.writeShort(currentVal.length);
110+
if (DataTypeUtil.isByteArrayComplexChildColumn(dataType)) {
111+
dataOutputStream.writeInt(currentVal.length);
112+
} else {
113+
dataOutputStream.writeShort(currentVal.length);
114+
}
111115
}
112116
dataOutputStream.write(currentVal);
113117
}
@@ -158,7 +162,12 @@ private Object getDataObject(ByteBuffer dataBuffer, int size) {
158162
actualData = directDictionaryGenerator.getValueFromSurrogate(surrgateValue);
159163
} else if (!isDictionary) {
160164
if (size == -1) {
161-
size = dataBuffer.getShort();
165+
if (DataTypeUtil.isByteArrayComplexChildColumn(dataType)) {
166+
size = dataBuffer.getInt();
167+
} else {
168+
size = dataBuffer.getShort();
169+
}
170+
162171
}
163172
byte[] value = new byte[size];
164173
dataBuffer.get(value, 0, size);

core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java

+1
Original file line numberDiff line numberDiff line change
@@ -3225,6 +3225,7 @@ public static boolean isEncodedWithMeta(List<org.apache.carbondata.format.Encodi
32253225
case ADAPTIVE_DELTA_INTEGRAL:
32263226
case ADAPTIVE_FLOATING:
32273227
case ADAPTIVE_DELTA_FLOATING:
3228+
case INT_LENGTH_COMPLEX_CHILD_BYTE_ARRAY:
32283229
return true;
32293230
}
32303231
}

core/src/main/java/org/apache/carbondata/core/util/DataTypeUtil.java

+14
Original file line numberDiff line numberDiff line change
@@ -1125,4 +1125,18 @@ public static boolean isPrimitiveColumn(DataType dataType) {
11251125
return false;
11261126
}
11271127

1128+
/**
1129+
* utility function to check complex column child columns that can exceed 32000 length
1130+
*
1131+
* @param dataType
1132+
* @return
1133+
*/
1134+
public static boolean isByteArrayComplexChildColumn(DataType dataType) {
1135+
return ((dataType == DataTypes.STRING) ||
1136+
(dataType == DataTypes.VARCHAR) ||
1137+
(dataType == DataTypes.BINARY) ||
1138+
(dataType == DataTypes.DATE) ||
1139+
DataTypes.isDecimal(dataType) ||
1140+
(dataType == DataTypes.BYTE_ARRAY));
1141+
}
11281142
}

0 commit comments

Comments
 (0)