Skip to content

Commit 4af464c

Browse files
authored
GH-721: Allow using 1GB+ data buffers in variable width vectors (#722)
## What's Changed Allow actually reaching MAX_BUFFER_SIZE at reallocating variable width vectors instead of exceeding it calculating the next power of 2. For unit testing the maximum allocation size has be increased to 2MB - 1byte to simulate the default maximum behavior. Due to this change needed some updates in existing unit tests because of the round ups used at calculating the required buffer sizes. Closes #721.
1 parent 4b4d928 commit 4af464c

File tree

6 files changed

+40
-10
lines changed

6 files changed

+40
-10
lines changed

pom.xml

+2-2
Original file line numberDiff line numberDiff line change
@@ -327,8 +327,8 @@ under the License.
327327
<io.netty.tryReflectionSetAccessible>true</io.netty.tryReflectionSetAccessible>
328328
<user.timezone>UTC</user.timezone>
329329
<!-- Note: changing the below configuration might increase the max allocation size for a vector
330-
which in turn can cause OOM. -->
331-
<arrow.vector.max_allocation_bytes>1048576</arrow.vector.max_allocation_bytes>
330+
which in turn can cause OOM. Using 2MB - 1byte to simulate the defaul limit of 2^31 - 1 bytes. -->
331+
<arrow.vector.max_allocation_bytes>2097151</arrow.vector.max_allocation_bytes>
332332
</systemPropertyVariables>
333333
<useModulePath>false</useModulePath>
334334
</configuration>

vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthVector.java

+5-2
Original file line numberDiff line numberDiff line change
@@ -571,10 +571,13 @@ public void reallocDataBuffer(long desiredAllocSize) {
571571
return;
572572
}
573573

574-
final long newAllocationSize = CommonUtil.nextPowerOfTwo(desiredAllocSize);
574+
final long newAllocationSize =
575+
Math.min(CommonUtil.nextPowerOfTwo(desiredAllocSize), MAX_BUFFER_SIZE);
575576
assert newAllocationSize >= 1;
576577

577-
checkDataBufferSize(newAllocationSize);
578+
if (newAllocationSize < desiredAllocSize) {
579+
checkDataBufferSize(desiredAllocSize);
580+
}
578581

579582
final ArrowBuf newBuf = allocator.buffer(newAllocationSize);
580583
newBuf.setBytes(0, valueBuffer, 0, valueBuffer.capacity());

vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthViewVector.java

+10-4
Original file line numberDiff line numberDiff line change
@@ -550,15 +550,18 @@ public void reallocViewBuffer(long desiredAllocSize) {
550550
if (desiredAllocSize == 0) {
551551
return;
552552
}
553-
long newAllocationSize = CommonUtil.nextPowerOfTwo(desiredAllocSize);
553+
long newAllocationSize = Math.min(CommonUtil.nextPowerOfTwo(desiredAllocSize), MAX_BUFFER_SIZE);
554554
assert newAllocationSize >= 1;
555555

556-
checkDataBufferSize(newAllocationSize);
557556
// for each set operation, we have to allocate 16 bytes
558557
// here we are adjusting the desired allocation-based allocation size
559558
// to align with the 16bytes requirement.
560559
newAllocationSize = roundUpToMultipleOf16(newAllocationSize);
561560

561+
if (newAllocationSize < desiredAllocSize) {
562+
checkDataBufferSize(desiredAllocSize);
563+
}
564+
562565
final ArrowBuf newBuf = allocator.buffer(newAllocationSize);
563566
newBuf.setBytes(0, viewBuffer, 0, viewBuffer.capacity());
564567

@@ -587,10 +590,13 @@ public void reallocViewDataBuffer(long desiredAllocSize) {
587590
return;
588591
}
589592

590-
final long newAllocationSize = CommonUtil.nextPowerOfTwo(desiredAllocSize);
593+
final long newAllocationSize =
594+
Math.min(CommonUtil.nextPowerOfTwo(desiredAllocSize), MAX_BUFFER_SIZE);
591595
assert newAllocationSize >= 1;
592596

593-
checkDataBufferSize(newAllocationSize);
597+
if (newAllocationSize < desiredAllocSize) {
598+
checkDataBufferSize(desiredAllocSize);
599+
}
594600

595601
final ArrowBuf newBuf = allocator.buffer(newAllocationSize);
596602
dataBuffers.add(newBuf);

vector/src/test/java/org/apache/arrow/vector/TestValueVector.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,7 @@ public void init() {
9595
private static final byte[] STR5 = "EEE5".getBytes(utf8Charset);
9696
private static final byte[] STR6 = "FFFFF6".getBytes(utf8Charset);
9797
private static final int MAX_VALUE_COUNT =
98-
(int) (Integer.getInteger("arrow.vector.max_allocation_bytes", Integer.MAX_VALUE) / 7);
98+
(int) (Integer.getInteger("arrow.vector.max_allocation_bytes", Integer.MAX_VALUE) / 9);
9999
private static final int MAX_VALUE_COUNT_8BYTE = (int) (MAX_VALUE_COUNT / 2);
100100

101101
@AfterEach

vector/src/test/java/org/apache/arrow/vector/TestVectorReAlloc.java

+12
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
import java.nio.charset.StandardCharsets;
2525
import org.apache.arrow.memory.BufferAllocator;
2626
import org.apache.arrow.memory.RootAllocator;
27+
import org.apache.arrow.memory.util.CommonUtil;
2728
import org.apache.arrow.vector.complex.DenseUnionVector;
2829
import org.apache.arrow.vector.complex.FixedSizeListVector;
2930
import org.apache.arrow.vector.complex.ListVector;
@@ -222,6 +223,17 @@ public void testVariableAllocateAfterReAlloc() throws Exception {
222223
}
223224
}
224225

226+
@Test
227+
public void testVariableReAllocAbove1GB() throws Exception {
228+
try (final VarCharVector vector = new VarCharVector("", allocator)) {
229+
long desiredSizeAboveLastPowerOf2 =
230+
CommonUtil.nextPowerOfTwo(BaseVariableWidthVector.MAX_ALLOCATION_SIZE) / 2 + 1;
231+
vector.reallocDataBuffer(desiredSizeAboveLastPowerOf2);
232+
233+
assertTrue(vector.getDataBuffer().capacity() >= desiredSizeAboveLastPowerOf2);
234+
}
235+
}
236+
225237
@Test
226238
public void testLargeVariableAllocateAfterReAlloc() throws Exception {
227239
try (final LargeVarCharVector vector = new LargeVarCharVector("", allocator)) {

vector/src/test/java/org/apache/arrow/vector/util/TestVectorAppender.java

+10-1
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
import java.util.stream.Stream;
2929
import org.apache.arrow.memory.BufferAllocator;
3030
import org.apache.arrow.memory.RootAllocator;
31+
import org.apache.arrow.memory.util.CommonUtil;
3132
import org.apache.arrow.vector.BaseValueVector;
3233
import org.apache.arrow.vector.BaseVariableWidthViewVector;
3334
import org.apache.arrow.vector.BigIntVector;
@@ -309,7 +310,15 @@ public void testAppendEmptyVariableWidthVector() {
309310

310311
@Test
311312
public void testAppendLargeAndSmallVariableVectorsWithinLimit() {
312-
int sixteenthOfMaxAllocation = Math.toIntExact(BaseValueVector.MAX_ALLOCATION_SIZE / 16);
313+
// Using the max power of 2 allocation size to avoid hitting the max limit at round ups
314+
long maxPowerOfTwoAllocationSize =
315+
CommonUtil.nextPowerOfTwo(BaseValueVector.MAX_ALLOCATION_SIZE);
316+
if (maxPowerOfTwoAllocationSize > BaseValueVector.MAX_ALLOCATION_SIZE) {
317+
maxPowerOfTwoAllocationSize =
318+
CommonUtil.nextPowerOfTwo(BaseValueVector.MAX_ALLOCATION_SIZE / 2);
319+
}
320+
321+
int sixteenthOfMaxAllocation = Math.toIntExact(maxPowerOfTwoAllocationSize / 16);
313322
try (VarCharVector target = makeVarCharVec(1, sixteenthOfMaxAllocation);
314323
VarCharVector delta = makeVarCharVec(sixteenthOfMaxAllocation, 1)) {
315324
new VectorAppender(delta).visit(target, null);

0 commit comments

Comments
 (0)