Skip to content

Commit 363997d

Browse files
committed
performance: align structures for 64-bit platforms
- _tagCLGLBufferInfo 56 -> 48 bytes - InlineSamplerBaseT 16 -> 12 bytes - CsrSelectionArgs 88 -> 80 bytes - EngineInfo 64 -> 48 bytes - FragmentStorage 48 -> 40 bytes - SvmFreeUserData 40 -> 32 bytes - PayloadArgumentBaseT 40 -> 36 bytes - ImageDescriptor 72 -> 64 bytes - PerThreadMemoryBufferBaseT 16 -> 12 bytes - CopyEngineState 24 -> 16 bytes - BatchBuffer 120 -> 112 bytes - DebugMetadata 32 -> 24 bytes - ImmediateFlushData 48 -> 40 bytes
1 parent c5d541d commit 363997d

17 files changed

+44
-43
lines changed

opencl/extensions/public/cl_gl_private_intel_structures.h

+4-4
Original file line numberDiff line numberDiff line change
@@ -48,15 +48,15 @@ typedef struct _tagGLCLResourceInfo {
4848
} GL_CL_RESOURCE_INFO, *PGL_CL_RESOURCE_INFO;
4949

5050
typedef struct _tagCLGLBufferInfo {
51-
GLenum bufferName;
52-
unsigned int globalShareHandle;
5351
GMM_RESOURCE_INFO *pGmmResInfo; /// Pointer to GMMResInfo from GL that will be copied in CL (GL)
5452
GLvoid *pSysMem;
53+
GLvoid *pReleaseData;
54+
GLenum bufferName;
55+
unsigned int globalShareHandle;
5556
GLint bufferSize;
5657
GLint bufferOffset;
57-
GLboolean oglSynchronized;
5858
GMM_STATUS status;
59-
GLvoid *pReleaseData;
59+
GLboolean oglSynchronized;
6060
GLboolean createOrDestroy;
6161
} CL_GL_BUFFER_INFO, *PCL_GL_BUFFER_INFO;
6262

opencl/source/command_queue/copy_engine_state.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,8 @@
1313

1414
namespace NEO {
1515
struct CopyEngineState {
16-
aub_stream::EngineType engineType = aub_stream::EngineType::NUM_ENGINES;
1716
TaskCountType taskCount = 0;
17+
aub_stream::EngineType engineType = aub_stream::EngineType::NUM_ENGINES;
1818
bool csrClientRegistered = false;
1919

2020
bool isValid() const {

opencl/source/command_queue/csr_selection_args.h

+6-6
Original file line numberDiff line numberDiff line change
@@ -24,21 +24,21 @@ struct CsrSelectionArgs {
2424
const size_t *imageOrigin = nullptr;
2525
};
2626

27-
cl_command_type cmdType;
28-
const size_t *size = nullptr;
2927
Resource srcResource;
3028
Resource dstResource;
29+
const size_t *size = nullptr;
30+
cl_command_type cmdType;
3131
TransferDirection direction;
3232

3333
CsrSelectionArgs(cl_command_type cmdType, const size_t *size)
34-
: cmdType(cmdType),
35-
size(size),
34+
: size(size),
35+
cmdType(cmdType),
3636
direction(TransferDirection::hostToHost) {}
3737

3838
template <typename ResourceType>
3939
CsrSelectionArgs(cl_command_type cmdType, ResourceType *src, ResourceType *dst, uint32_t rootDeviceIndex, const size_t *size)
40-
: cmdType(cmdType),
41-
size(size) {
40+
: size(size),
41+
cmdType(cmdType) {
4242
if (src) {
4343
processResource(*src, rootDeviceIndex, this->srcResource);
4444
}

opencl/source/command_queue/enqueue_svm.h

+3-3
Original file line numberDiff line numberDiff line change
@@ -25,20 +25,20 @@ using SvmFreeClbT = void(CL_CALLBACK *)(cl_command_queue queue,
2525
void *userData);
2626

2727
struct SvmFreeUserData {
28-
cl_uint numSvmPointers;
2928
void **svmPointers;
3029
SvmFreeClbT clb;
3130
void *userData;
31+
cl_uint numSvmPointers;
3232
bool ownsEventDeletion;
3333

3434
SvmFreeUserData(cl_uint numSvmPointers,
3535
void **svmPointers, SvmFreeClbT clb,
3636
void *userData,
3737
bool ownsEventDeletion)
38-
: numSvmPointers(numSvmPointers),
39-
svmPointers(svmPointers),
38+
: svmPointers(svmPointers),
4039
clb(clb),
4140
userData(userData),
41+
numSvmPointers(numSvmPointers),
4242
ownsEventDeletion(ownsEventDeletion){};
4343
};
4444

opencl/test/unit_test/command_queue/blit_enqueue_1_tests.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -1251,7 +1251,7 @@ HWTEST_TEMPLATED_F(BlitEnqueueTaskCountTests, whenWaitUntilCompletionCalledThenW
12511251
uint32_t gpgpuTaskCount = 123;
12521252
uint32_t bcsTaskCount = 123;
12531253

1254-
CopyEngineState bcsState{bcsCsr->getOsContext().getEngineType(), bcsTaskCount};
1254+
CopyEngineState bcsState{bcsTaskCount, bcsCsr->getOsContext().getEngineType()};
12551255
commandQueue->waitUntilComplete(gpgpuTaskCount, Range{&bcsState}, 0, false);
12561256

12571257
EXPECT_EQ(gpgpuTaskCount, static_cast<UltCommandStreamReceiver<FamilyType> *>(gpgpuCsr)->latestWaitForCompletionWithTimeoutTaskCount.load());

opencl/test/unit_test/command_queue/command_queue_hw_1_tests.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ HWTEST_F(CommandQueueHwTest, whenCallingIsCompletedThenTestTaskCountValue) {
5555
bcsCsr->setupContext(*osContext);
5656
bcsCsr->initializeTagAllocation();
5757
EngineControl control(bcsCsr.get(), osContext.get());
58-
CopyEngineState state{aub_stream::EngineType::ENGINE_BCS, 1, false};
58+
CopyEngineState state{1, aub_stream::EngineType::ENGINE_BCS, false};
5959

6060
MockCommandQueueHw<FamilyType> cmdQ(context, pClDevice, nullptr);
6161

opencl/test/unit_test/command_stream/cl_tbx_command_stream_tests.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ HWTEST_F(ClTbxCommandStreamTests, givenTbxCsrWhenDispatchBlitEnqueueThenProcessC
4747
cmdQ.clearBcsEngines();
4848
cmdQ.bcsEngines[0] = &engineControl1;
4949

50-
cmdQ.bcsStates[0] = {aub_stream::ENGINE_BCS, 0, false};
50+
cmdQ.bcsStates[0] = {0, aub_stream::ENGINE_BCS, false};
5151

5252
cl_int error = CL_SUCCESS;
5353
std::unique_ptr<Buffer> buffer(Buffer::create(&context, 0, 1, nullptr, error));

opencl/test/unit_test/mem_obj/buffer_bcs_tests.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -619,7 +619,7 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenAllBcsEnginesReadyWhenWaitingForEventThe
619619
ultCsr2.initializeTagAllocation();
620620
ultCsr2.setupContext(osContext);
621621

622-
CopyEngineState copyEngineState = {aub_stream::EngineType::ENGINE_BCS2, 2, false};
622+
CopyEngineState copyEngineState = {2, aub_stream::EngineType::ENGINE_BCS2, false};
623623
EngineControl engineControl = {&ultCsr2, &osContext};
624624
auto bcs2Index = EngineHelpers::getBcsIndex(aub_stream::EngineType::ENGINE_BCS2);
625625
mockCmdQ->bcsStates[bcs2Index] = copyEngineState;

opencl/test/unit_test/mem_obj/image_tests.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -1559,7 +1559,7 @@ TEST(ImageConvertDescriptorTest, givenClImageDescWhenConvertedThenCorrectImageDe
15591559
}
15601560

15611561
TEST(ImageConvertDescriptorTest, givenImageDescriptorWhenConvertedThenCorrectClImageDescIsReturned) {
1562-
ImageDescriptor desc = {ImageType::image2D, 16, 24, 1, 1, 1024, 2048, 1, 3, false};
1562+
ImageDescriptor desc = {16, 24, 1, 1, 1024, 2048, ImageType::image2D, 1, 3, false};
15631563
auto clDesc = Image::convertDescriptor(desc);
15641564

15651565
EXPECT_EQ(clDesc.image_type, static_cast<cl_mem_object_type>(CL_MEM_OBJECT_IMAGE2D));

shared/source/command_stream/command_stream_receiver_hw.h

+2-2
Original file line numberDiff line numberDiff line change
@@ -25,10 +25,10 @@ class CommandStreamReceiverHw : public CommandStreamReceiver {
2525
using STATE_BASE_ADDRESS = typename GfxFamily::STATE_BASE_ADDRESS;
2626

2727
struct ImmediateFlushData {
28-
PipelineSelectArgs pipelineSelectArgs{};
29-
size_t estimatedSize = 0;
3028
void *endPtr = nullptr;
29+
size_t estimatedSize = 0;
3130
size_t csrStartOffset = 0;
31+
PipelineSelectArgs pipelineSelectArgs{};
3232

3333
bool pipelineSelectFullConfigurationNeeded = false;
3434
bool pipelineSelectDirty = false;

shared/source/command_stream/command_stream_receiver_simulated_common_hw.h

+3-3
Original file line numberDiff line numberDiff line change
@@ -81,12 +81,12 @@ class CommandStreamReceiverSimulatedCommonHw : public CommandStreamReceiverHw<Gf
8181

8282
struct EngineInfo {
8383
void *pLRCA;
84-
uint32_t ggttLRCA;
8584
void *pGlobalHWStatusPage;
86-
uint32_t ggttHWSP;
8785
void *pRingBuffer;
88-
uint32_t ggttRingBuffer;
8986
size_t sizeRingBuffer;
87+
uint32_t ggttLRCA;
88+
uint32_t ggttHWSP;
89+
uint32_t ggttRingBuffer;
9090
uint32_t tailRingBuffer;
9191
} engineInfo = {};
9292

shared/source/command_stream/submissions_aggregator.cpp

+5-5
Original file line numberDiff line numberDiff line change
@@ -99,11 +99,11 @@ NEO::BatchBuffer::BatchBuffer(GraphicsAllocation *commandBufferAllocation, size_
9999
size_t usedSize, LinearStream *stream, void *endCmdPtr, uint32_t numCsrClients, bool hasStallingCmds,
100100
bool hasRelaxedOrderingDependencies, bool dispatchMonitorFence, bool taskCountUpdateOnly)
101101
: commandBufferAllocation(commandBufferAllocation), startOffset(startOffset),
102-
chainedBatchBufferStartOffset(chainedBatchBufferStartOffset), taskStartAddress(taskStartAddress), chainedBatchBuffer(chainedBatchBuffer),
103-
lowPriority(lowPriority),
104-
throttle(throttle), sliceCount(sliceCount),
105-
usedSize(usedSize), stream(stream), endCmdPtr(endCmdPtr), numCsrClients(numCsrClients), hasStallingCmds(hasStallingCmds),
106-
hasRelaxedOrderingDependencies(hasRelaxedOrderingDependencies), dispatchMonitorFence(dispatchMonitorFence), taskCountUpdateOnly(taskCountUpdateOnly) {}
102+
chainedBatchBufferStartOffset(chainedBatchBufferStartOffset), taskStartAddress(taskStartAddress), stream(stream), endCmdPtr(endCmdPtr),
103+
numCsrClients(numCsrClients), hasStallingCmds(hasStallingCmds), hasRelaxedOrderingDependencies(hasRelaxedOrderingDependencies),
104+
dispatchMonitorFence(dispatchMonitorFence), taskCountUpdateOnly(taskCountUpdateOnly), lowPriority(lowPriority), throttle(throttle),
105+
chainedBatchBuffer(chainedBatchBuffer), sliceCount(sliceCount),
106+
usedSize(usedSize) {}
107107

108108
NEO::CommandBuffer::CommandBuffer(Device &device) : device(device) {
109109
flushStamp.reset(new FlushStampTracker(false));

shared/source/command_stream/submissions_aggregator.h

+9-8
Original file line numberDiff line numberDiff line change
@@ -46,30 +46,31 @@ struct BatchBuffer {
4646
bool dispatchMonitorFence,
4747
bool taskCountUpdateOnly);
4848
BatchBuffer() {}
49+
50+
PagingFenceSemaphoreInfo pagingFenceSemInfo{};
51+
4952
GraphicsAllocation *commandBufferAllocation = nullptr;
5053
ResidencyContainer *allocationsForResidency = nullptr;
5154
size_t startOffset = 0u;
5255
size_t chainedBatchBufferStartOffset = 0u;
5356
uint64_t taskStartAddress = 0; // if task not available, use CSR stream
5457

55-
GraphicsAllocation *chainedBatchBuffer = nullptr;
56-
bool lowPriority = false;
57-
QueueThrottle throttle = QueueThrottle::MEDIUM;
58-
uint64_t sliceCount = QueueSliceCount::defaultSliceCount;
59-
size_t usedSize = 0u;
60-
6158
// only used in drm csr in gem close worker active mode
6259
LinearStream *stream = nullptr;
6360
void *endCmdPtr = nullptr;
6461
uint32_t numCsrClients = 0;
6562

66-
PagingFenceSemaphoreInfo pagingFenceSemInfo{};
67-
6863
bool hasStallingCmds = false;
6964
bool hasRelaxedOrderingDependencies = false;
7065
bool disableFlatRingBuffer = false;
7166
bool dispatchMonitorFence = false;
7267
bool taskCountUpdateOnly = false;
68+
69+
bool lowPriority = false;
70+
QueueThrottle throttle = QueueThrottle::MEDIUM;
71+
GraphicsAllocation *chainedBatchBuffer = nullptr;
72+
uint64_t sliceCount = QueueSliceCount::defaultSliceCount;
73+
size_t usedSize = 0u;
7374
};
7475

7576
struct CommandBuffer : public IDNode<CommandBuffer> {

shared/source/device_binary_format/zebin/zeinfo.h

+3-3
Original file line numberDiff line numberDiff line change
@@ -634,12 +634,12 @@ inline constexpr BtiValueT btiValue = -1;
634634
} // namespace Defaults
635635

636636
struct PayloadArgumentBaseT {
637-
ArgTypeT argType = argTypeUnknown;
638637
OffsetT offset = Defaults::offset;
639638
SourceOffseT sourceOffset = Defaults::sourceOffset;
640639
SizeT size = 0;
641640
ArgIndexT argIndex = Defaults::argIndex;
642641
BtiValueT btiValue = Defaults::btiValue;
642+
ArgTypeT argType = argTypeUnknown;
643643
AddrmodeT addrmode = memoryAddressingModeUnknown;
644644
AddrspaceT addrspace = addressSpaceUnknown;
645645
AccessTypeT accessType = accessTypeUnknown;
@@ -692,9 +692,9 @@ inline constexpr Slot slot = 0U;
692692
} // namespace Defaults
693693

694694
struct PerThreadMemoryBufferBaseT {
695+
SizeT size = 0U;
695696
AllocationType allocationType = AllocationTypeUnknown;
696697
MemoryUsageT memoryUsage = MemoryUsageUnknown;
697-
SizeT size = 0U;
698698
IsSimtThreadT isSimtThread = Defaults::isSimtThread;
699699
Slot slot = Defaults::slot;
700700
};
@@ -732,8 +732,8 @@ inline constexpr NormalizedT normalized = false;
732732

733733
struct InlineSamplerBaseT {
734734
SamplerIndexT samplerIndex = Defaults::samplerIndex;
735-
AddrModeT addrMode = Defaults::addrMode;
736735
FilterModeT filterMode = Defaults::filterMode;
736+
AddrModeT addrMode = Defaults::addrMode;
737737
NormalizedT normalized = Defaults::normalized;
738738
};
739739
} // namespace InlineSamplers

shared/source/helpers/surface_format_info.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -218,13 +218,13 @@ enum class ImageType {
218218
};
219219

220220
struct ImageDescriptor {
221-
ImageType imageType;
222221
size_t imageWidth;
223222
size_t imageHeight;
224223
size_t imageDepth;
225224
size_t imageArraySize;
226225
size_t imageRowPitch;
227226
size_t imageSlicePitch;
227+
ImageType imageType;
228228
uint32_t numMipLevels;
229229
uint32_t numSamples;
230230
bool fromParent;

shared/source/memory_manager/host_ptr_defines.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -51,9 +51,9 @@ struct AllocationRequirements {
5151
struct FragmentStorage {
5252
const void *fragmentCpuPointer = nullptr;
5353
size_t fragmentSize = 0;
54-
int refCount = 0;
5554
OsHandle *osInternalStorage = nullptr;
5655
ResidencyData *residency = nullptr;
56+
int refCount = 0;
5757
bool driverAllocation = false;
5858
};
5959

shared/source/os_interface/linux/xe/ioctl_helper_xe.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -190,9 +190,9 @@ class IoctlHelperXe : public IoctlHelper {
190190

191191
std::unique_ptr<XeDrm::drm_xe_engine_class_instance> defaultEngine;
192192
struct DebugMetadata {
193-
DrmResourceClass type;
194193
uint64_t offset;
195194
uint64_t size;
195+
DrmResourceClass type;
196196
bool isCookie;
197197
};
198198

0 commit comments

Comments
 (0)