Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
bc16613
Preparing existing profiler for adding concurrent profiling
jainankitk Mar 27, 2025
468fd78
Adding apache license header to AbstractQueryProfilerBreakdown
jainankitk Mar 27, 2025
67c2449
Tidying the code
jainankitk Mar 27, 2025
c4565e2
Adding the missing javadoc
jainankitk Mar 27, 2025
8917ec6
Tidying the javadoc
jainankitk Mar 27, 2025
c310173
Unifying code execution for concurrent and non-concurrent profiling
jainankitk Mar 28, 2025
5d5d9f7
Fixing the import list
jainankitk Mar 28, 2025
0c06174
Fixing the unit test issue
jainankitk Mar 28, 2025
821c5dd
Fixing :lucene:sandbox:ecjLintMain
jainankitk Mar 28, 2025
f2ec1f2
Separating query timers at global/slice level
jainankitk Apr 2, 2025
6db3716
Interleaving slice level information within the query profile tree
jainankitk Apr 3, 2025
da12f97
Fixing unit tests to accommodate slice level breakdowns
jainankitk Apr 3, 2025
ea34e3f
Removing unnecessary null check
jainankitk Apr 3, 2025
ec0c71a
Merge branch 'main' into profiling-changes
jainankitk Apr 6, 2025
82f1f8d
Mention threadId explicitly instead of sliceId
jainankitk Apr 6, 2025
3c9794b
Replacing few more references
jainankitk Apr 6, 2025
1d57aca
Tidying the code
jainankitk Apr 6, 2025
239fa9b
Merge branch 'main' into profiling-changes
jainankitk Apr 14, 2025
74be558
Adding interface for plugging in different leaf level breakdown aggre…
jainankitk Apr 14, 2025
18793b8
Tidying the code
jainankitk Apr 14, 2025
984215f
Adding apache license header
jainankitk Apr 14, 2025
6cf5276
Adding getter for aggregation type
jainankitk Apr 14, 2025
6a85b10
Merge branch 'main' into profiling-changes
jainankitk Jul 21, 2025
a051dab
Addressing review comments
jainankitk Jul 28, 2025
e809c55
Tidying the code
jainankitk Jul 28, 2025
e9c7191
Adding changelog entry
jainankitk Jul 28, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions lucene/CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,8 @@ New Features
* GITHUB#14404: Introducing DocValuesMultiRangeQuery.SortedNumericStabbingBuilder into sandbox.
(Mikhail Khludnev)

* GITHUB#14413: Adding profiling support for concurrent segment search. (Ankit Jain)

* GITHUB#14565: Add ParentsChildrenBlockJoinQuery that supports parent and child filter in the same query
along with limiting number of child documents to retrieve per parent. (Jinny Wang)

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.lucene.sandbox.search;

import java.util.Collections;
import java.util.Map;
import java.util.Objects;

/**
* This class is the internal representation of a profiled Query, corresponding to a single node in
* the query tree. It is built after the query has finished executing and is merely a structured
* representation, rather than the entity that collects the timing profile.
*/
public class AggregatedQueryLeafProfilerResult {
private final Thread thread;
private final Map<String, Long> breakdown;
private final long startTime;
private final long totalTime;

public AggregatedQueryLeafProfilerResult(
Thread thread, Map<String, Long> breakdown, long startTime, long totalTime) {
this.thread = thread;
this.breakdown =
Collections.unmodifiableMap(
Objects.requireNonNull(breakdown, "required breakdown argument missing"));
this.startTime = startTime;
this.totalTime = totalTime;
}

/**
* Returns the thread that executed query for these leaves
*
* @return thread that executed query node
*/
public Thread getThread() {
return thread;
}

/**
* Returns the timing breakdown for this node.
*
* @return map containing time breakdown across different operation types
*/
public Map<String, Long> getTimeBreakdown() {
return breakdown;
}

/**
* Returns the start time for this query node execution.
*
* @return start time in nanoseconds
*/
public long getStartTime() {
return startTime;
}

/**
* Returns the total time (inclusive of children) for this query node.
*
* @return elapsed time in nanoseconds
*/
public long getTotalTime() {
return totalTime;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.lucene.sandbox.search;

import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.EnumMap;
import java.util.Map;
import org.apache.lucene.util.CollectionUtil;

/**
* A record of timings for the various operations that may happen during query execution. A node's
* time may be composed of several internal attributes (rewriting, weighting, scoring, etc).
*/
public class QueryLeafProfilerBreakdown {
private static final Collection<QueryProfilerTimingType> LEAF_LEVEL_TIMING_TYPE =
Arrays.stream(QueryProfilerTimingType.values()).filter(t -> t.isLeafLevel()).toList();

/** The accumulated timings for this query node */
private final QueryProfilerTimer[] timers;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What about using an EnumMap, which will be implemented by an array like this one under the hood?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Although minor, but EnumMap will initialize the timers for all the possible keys vals = new Object[keyUniverse.length];. It seemed bit unnecessary to me, but we can change if using EnumMap improves the readability.


EnumMap<QueryProfilerTimingType, QueryProfilerTimer> map;

/** Sole constructor. */
public QueryLeafProfilerBreakdown() {
timers = new QueryProfilerTimer[LEAF_LEVEL_TIMING_TYPE.size()];
for (int i = 0; i < timers.length; ++i) {
timers[i] = new QueryProfilerTimer();
}
}

public QueryProfilerTimer getTimer(QueryProfilerTimingType type) {
return timers[type.ordinal()];
}

/** Build a timing count breakdown. */
public final Map<String, Long> toBreakdownMap() {
Map<String, Long> map = CollectionUtil.newHashMap(timers.length * 2);
for (QueryProfilerTimingType type : LEAF_LEVEL_TIMING_TYPE) {
map.put(type.toString(), timers[type.ordinal()].getApproximateTiming());
map.put(type.toString() + "_count", timers[type.ordinal()].getCount());
}
return Collections.unmodifiableMap(map);
}

public final AggregatedQueryLeafProfilerResult getLeafProfilerResult(Thread thread) {
final Map<String, Long> map = CollectionUtil.newHashMap(LEAF_LEVEL_TIMING_TYPE.size() * 2);
long sliceStartTime = Long.MAX_VALUE;
long sliceEndTime = Long.MIN_VALUE;
for (QueryProfilerTimingType type : LEAF_LEVEL_TIMING_TYPE) {
final QueryProfilerTimer timer = timers[type.ordinal()];
// Consider timer for updating start/total time only
// if it was used
if (timer.getCount() > 0) {
sliceStartTime = Math.min(sliceStartTime, timer.getEarliestTimerStartTime());
sliceEndTime =
Math.max(
sliceEndTime, timer.getEarliestTimerStartTime() + timer.getApproximateTiming());
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It doesn't feel right the compute the end time as the start time plus the approximate timing, since operations will often be interleaved. What about reporting the sum of the approximate timings across operation types instead, ie. the value of toTotalTime()?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good catch! That was an oversight from my side. Will change this toTotalTime(), which should approximately reflect the consumed cpu time for processing this query

}
map.put(type.toString(), timer.getApproximateTiming());
map.put(type.toString() + "_count", timer.getCount());
}
return new AggregatedQueryLeafProfilerResult(
thread, map, sliceStartTime, sliceEndTime - sliceStartTime);
}

public final long toTotalTime() {
long total = 0;
for (QueryProfilerTimer timer : timers) {
total += timer.getApproximateTiming();
}
return total;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.lucene.sandbox.search;

import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;

/** QueryLeafProfilerAggregator that aggregates leaf breakdowns at thread level */
class QueryLeafProfilerThreadAggregator {
private final ConcurrentMap<Thread, QueryLeafProfilerBreakdown> queryThreadBreakdowns;
private long queryStartTime = Long.MAX_VALUE;
private long queryTotalTime = 0;

public QueryLeafProfilerThreadAggregator() {
queryThreadBreakdowns = new ConcurrentHashMap<>();
}

public long getQueryStartTime() {
return queryStartTime;
}

public long getQueryTotalTime() {
return queryTotalTime;
}

private QueryLeafProfilerBreakdown getQueryThreadProfilerBreakdown() {
final Thread currentThread = Thread.currentThread();
// See please https://bugs.openjdk.java.net/browse/JDK-8161372
final QueryLeafProfilerBreakdown profilerBreakdown = queryThreadBreakdowns.get(currentThread);

if (profilerBreakdown != null) {
return profilerBreakdown;
}

return queryThreadBreakdowns.computeIfAbsent(
currentThread, _ -> new QueryLeafProfilerBreakdown());
}

public QueryProfilerTimer getTimer(QueryProfilerTimingType timingType) {
assert timingType.isLeafLevel();

return getQueryThreadProfilerBreakdown().getTimer(timingType);
}

public List<AggregatedQueryLeafProfilerResult> getAggregatedQueryLeafProfilerResults() {
final List<AggregatedQueryLeafProfilerResult> profilerResults = new ArrayList<>();
for (Thread thread : queryThreadBreakdowns.keySet()) {
final AggregatedQueryLeafProfilerResult aggregatedQueryLeafProfilerResult =
queryThreadBreakdowns.get(thread).getLeafProfilerResult(thread);
queryStartTime = Math.min(queryStartTime, aggregatedQueryLeafProfilerResult.getStartTime());
queryTotalTime += aggregatedQueryLeafProfilerResult.getTotalTime();
profilerResults.add(aggregatedQueryLeafProfilerResult);
}

return profilerResults;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -17,46 +17,89 @@

package org.apache.lucene.sandbox.search;

import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.lucene.search.Query;
import org.apache.lucene.util.CollectionUtil;

/**
* A record of timings for the various operations that may happen during query execution. A node's
* time may be composed of several internal attributes (rewriting, weighting, scoring, etc).
*/
class QueryProfilerBreakdown {
private static final Collection<QueryProfilerTimingType> QUERY_LEVEL_TIMING_TYPE =
Arrays.stream(QueryProfilerTimingType.values()).filter(t -> !t.isLeafLevel()).toList();
private final Map<QueryProfilerTimingType, QueryProfilerTimer> queryProfilerTimers;

/** The accumulated timings for this query node */
private final QueryProfilerTimer[] timers;
private final QueryLeafProfilerThreadAggregator queryLeafProfilerAggregator;

/** Sole constructor. */
public QueryProfilerBreakdown() {
timers = new QueryProfilerTimer[QueryProfilerTimingType.values().length];
for (int i = 0; i < timers.length; ++i) {
timers[i] = new QueryProfilerTimer();
queryProfilerTimers = new HashMap<>();
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Or use an EnumMap?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks @jpountz. This is a good suggestion, and I missed that while making the changes. But when I started using it, I was curious about the implementation and saw that it initializes reference for all the Enum values during initialization:

public EnumMap(Class<K> keyType) {
        this.keyType = keyType;
        keyUniverse = getKeyUniverse(keyType);
        vals = new Object[keyUniverse.length];
    }

Since we are using small subset of QueryProfilerTimingType, I am wondering if this is a good idea. We can also consider separating QueryProfilerTimingType on the basis of slice level or query level.

this.queryLeafProfilerAggregator = new QueryLeafProfilerThreadAggregator();

for (QueryProfilerTimingType timingType : QUERY_LEVEL_TIMING_TYPE) {
queryProfilerTimers.put(timingType, new QueryProfilerTimer());
}
}

public QueryProfilerTimer getTimer(QueryProfilerTimingType type) {
return timers[type.ordinal()];
public final QueryProfilerTimer getTimer(QueryProfilerTimingType timingType) {
if (timingType.isLeafLevel()) {
return queryLeafProfilerAggregator.getTimer(timingType);
}

// Return the query level profiler timer if not
// slice level
return queryProfilerTimers.get(timingType);
}

/** Build a timing count breakdown. */
public final Map<String, Long> toBreakdownMap() {
Map<String, Long> map = CollectionUtil.newHashMap(timers.length * 2);
for (QueryProfilerTimingType type : QueryProfilerTimingType.values()) {
map.put(type.toString(), timers[type.ordinal()].getApproximateTiming());
map.put(type.toString() + "_count", timers[type.ordinal()].getCount());
public final QueryProfilerResult getQueryProfilerResult(
Query query, List<QueryProfilerResult> childrenProfileResults) {
long queryStartTime = Long.MAX_VALUE;
long queryTotalTime = 0;
final Map<String, Long> breakdownMap =
CollectionUtil.newHashMap(QUERY_LEVEL_TIMING_TYPE.size() * 2);
for (QueryProfilerTimingType type : QUERY_LEVEL_TIMING_TYPE) {
final QueryProfilerTimer timer = queryProfilerTimers.get(type);
if (timer.getCount() > 0) {
queryStartTime = Math.min(queryStartTime, timer.getEarliestTimerStartTime());
queryTotalTime += timer.getApproximateTiming();
}
// TODO: Should we put only non-zero timer values in the final output?
breakdownMap.put(type.toString(), queryProfilerTimers.get(type).getApproximateTiming());
breakdownMap.put(type.toString() + "_count", queryProfilerTimers.get(type).getCount());
}
return Collections.unmodifiableMap(map);

final List<AggregatedQueryLeafProfilerResult> threadProfilerResults =
queryLeafProfilerAggregator.getAggregatedQueryLeafProfilerResults();
queryStartTime = Math.min(queryStartTime, queryLeafProfilerAggregator.getQueryStartTime());
queryTotalTime += queryLeafProfilerAggregator.getQueryTotalTime();

return new QueryProfilerResult(
getTypeFromQuery(query),
getDescriptionFromQuery(query),
Collections.unmodifiableMap(breakdownMap),
threadProfilerResults,
childrenProfileResults,
queryStartTime,
queryTotalTime);
}

public final long toTotalTime() {
long total = 0;
for (QueryProfilerTimer timer : timers) {
total += timer.getApproximateTiming();
private String getTypeFromQuery(Query query) {
// Anonymous classes won't have a name,
// we need to get the super class
if (query.getClass().getSimpleName().isEmpty()) {
return query.getClass().getSuperclass().getSimpleName();
}
return total;
return query.getClass().getSimpleName();
}

private String getDescriptionFromQuery(Query query) {
return query.toString();
}
}
Loading
Loading