-
Notifications
You must be signed in to change notification settings - Fork 1.2k
Adding profiling support for concurrent segment search #14413
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
bc16613
468fd78
67c2449
c4565e2
8917ec6
c310173
5d5d9f7
0c06174
821c5dd
f2ec1f2
6db3716
da12f97
ea34e3f
ec0c71a
82f1f8d
3c9794b
1d57aca
239fa9b
74be558
18793b8
984215f
6cf5276
6a85b10
a051dab
e809c55
e9c7191
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,80 @@ | ||
| /* | ||
| * Licensed to the Apache Software Foundation (ASF) under one or more | ||
| * contributor license agreements. See the NOTICE file distributed with | ||
| * this work for additional information regarding copyright ownership. | ||
| * The ASF licenses this file to You under the Apache License, Version 2.0 | ||
| * (the "License"); you may not use this file except in compliance with | ||
| * the License. You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, software | ||
| * distributed under the License is distributed on an "AS IS" BASIS, | ||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| * See the License for the specific language governing permissions and | ||
| * limitations under the License. | ||
| */ | ||
|
|
||
| package org.apache.lucene.sandbox.search; | ||
|
|
||
| import java.util.Collections; | ||
| import java.util.Map; | ||
| import java.util.Objects; | ||
|
|
||
| /** | ||
| * This class is the internal representation of a profiled Query, corresponding to a single node in | ||
| * the query tree. It is built after the query has finished executing and is merely a structured | ||
| * representation, rather than the entity that collects the timing profile. | ||
| */ | ||
| public class AggregatedQueryLeafProfilerResult { | ||
| private final Thread thread; | ||
| private final Map<String, Long> breakdown; | ||
| private final long startTime; | ||
| private final long totalTime; | ||
|
|
||
| public AggregatedQueryLeafProfilerResult( | ||
| Thread thread, Map<String, Long> breakdown, long startTime, long totalTime) { | ||
| this.thread = thread; | ||
| this.breakdown = | ||
| Collections.unmodifiableMap( | ||
| Objects.requireNonNull(breakdown, "required breakdown argument missing")); | ||
| this.startTime = startTime; | ||
| this.totalTime = totalTime; | ||
| } | ||
|
|
||
| /** | ||
| * Returns the thread that executed query for these leaves | ||
| * | ||
| * @return thread that executed query node | ||
| */ | ||
| public Thread getThread() { | ||
| return thread; | ||
| } | ||
|
|
||
| /** | ||
| * Returns the timing breakdown for this node. | ||
| * | ||
| * @return map containing time breakdown across different operation types | ||
| */ | ||
| public Map<String, Long> getTimeBreakdown() { | ||
| return breakdown; | ||
| } | ||
|
|
||
| /** | ||
| * Returns the start time for this query node execution. | ||
| * | ||
| * @return start time in nanoseconds | ||
| */ | ||
| public long getStartTime() { | ||
| return startTime; | ||
| } | ||
|
|
||
| /** | ||
| * Returns the total time (inclusive of children) for this query node. | ||
| * | ||
| * @return elapsed time in nanoseconds | ||
| */ | ||
| public long getTotalTime() { | ||
| return totalTime; | ||
| } | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,90 @@ | ||
| /* | ||
| * Licensed to the Apache Software Foundation (ASF) under one or more | ||
| * contributor license agreements. See the NOTICE file distributed with | ||
| * this work for additional information regarding copyright ownership. | ||
| * The ASF licenses this file to You under the Apache License, Version 2.0 | ||
| * (the "License"); you may not use this file except in compliance with | ||
| * the License. You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, software | ||
| * distributed under the License is distributed on an "AS IS" BASIS, | ||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| * See the License for the specific language governing permissions and | ||
| * limitations under the License. | ||
| */ | ||
|
|
||
| package org.apache.lucene.sandbox.search; | ||
|
|
||
| import java.util.Arrays; | ||
| import java.util.Collection; | ||
| import java.util.Collections; | ||
| import java.util.EnumMap; | ||
| import java.util.Map; | ||
| import org.apache.lucene.util.CollectionUtil; | ||
|
|
||
| /** | ||
| * A record of timings for the various operations that may happen during query execution. A node's | ||
| * time may be composed of several internal attributes (rewriting, weighting, scoring, etc). | ||
| */ | ||
| public class QueryLeafProfilerBreakdown { | ||
| private static final Collection<QueryProfilerTimingType> LEAF_LEVEL_TIMING_TYPE = | ||
| Arrays.stream(QueryProfilerTimingType.values()).filter(t -> t.isLeafLevel()).toList(); | ||
|
|
||
| /** The accumulated timings for this query node */ | ||
| private final QueryProfilerTimer[] timers; | ||
|
|
||
| EnumMap<QueryProfilerTimingType, QueryProfilerTimer> map; | ||
|
|
||
| /** Sole constructor. */ | ||
| public QueryLeafProfilerBreakdown() { | ||
| timers = new QueryProfilerTimer[LEAF_LEVEL_TIMING_TYPE.size()]; | ||
| for (int i = 0; i < timers.length; ++i) { | ||
| timers[i] = new QueryProfilerTimer(); | ||
| } | ||
| } | ||
|
|
||
| public QueryProfilerTimer getTimer(QueryProfilerTimingType type) { | ||
| return timers[type.ordinal()]; | ||
| } | ||
|
|
||
| /** Build a timing count breakdown. */ | ||
| public final Map<String, Long> toBreakdownMap() { | ||
| Map<String, Long> map = CollectionUtil.newHashMap(timers.length * 2); | ||
| for (QueryProfilerTimingType type : LEAF_LEVEL_TIMING_TYPE) { | ||
| map.put(type.toString(), timers[type.ordinal()].getApproximateTiming()); | ||
| map.put(type.toString() + "_count", timers[type.ordinal()].getCount()); | ||
| } | ||
| return Collections.unmodifiableMap(map); | ||
| } | ||
|
|
||
| public final AggregatedQueryLeafProfilerResult getLeafProfilerResult(Thread thread) { | ||
| final Map<String, Long> map = CollectionUtil.newHashMap(LEAF_LEVEL_TIMING_TYPE.size() * 2); | ||
| long sliceStartTime = Long.MAX_VALUE; | ||
| long sliceEndTime = Long.MIN_VALUE; | ||
| for (QueryProfilerTimingType type : LEAF_LEVEL_TIMING_TYPE) { | ||
| final QueryProfilerTimer timer = timers[type.ordinal()]; | ||
| // Consider timer for updating start/total time only | ||
| // if it was used | ||
| if (timer.getCount() > 0) { | ||
| sliceStartTime = Math.min(sliceStartTime, timer.getEarliestTimerStartTime()); | ||
| sliceEndTime = | ||
| Math.max( | ||
| sliceEndTime, timer.getEarliestTimerStartTime() + timer.getApproximateTiming()); | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It doesn't feel right the compute the end time as the start time plus the approximate timing, since operations will often be interleaved. What about reporting the sum of the approximate timings across operation types instead, ie. the value of There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good catch! That was an oversight from my side. Will change this |
||
| } | ||
| map.put(type.toString(), timer.getApproximateTiming()); | ||
| map.put(type.toString() + "_count", timer.getCount()); | ||
| } | ||
| return new AggregatedQueryLeafProfilerResult( | ||
| thread, map, sliceStartTime, sliceEndTime - sliceStartTime); | ||
| } | ||
|
|
||
| public final long toTotalTime() { | ||
| long total = 0; | ||
| for (QueryProfilerTimer timer : timers) { | ||
| total += timer.getApproximateTiming(); | ||
| } | ||
| return total; | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,74 @@ | ||
| /* | ||
| * Licensed to the Apache Software Foundation (ASF) under one or more | ||
| * contributor license agreements. See the NOTICE file distributed with | ||
| * this work for additional information regarding copyright ownership. | ||
| * The ASF licenses this file to You under the Apache License, Version 2.0 | ||
| * (the "License"); you may not use this file except in compliance with | ||
| * the License. You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, software | ||
| * distributed under the License is distributed on an "AS IS" BASIS, | ||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| * See the License for the specific language governing permissions and | ||
| * limitations under the License. | ||
| */ | ||
|
|
||
| package org.apache.lucene.sandbox.search; | ||
|
|
||
| import java.util.ArrayList; | ||
| import java.util.List; | ||
| import java.util.concurrent.ConcurrentHashMap; | ||
| import java.util.concurrent.ConcurrentMap; | ||
|
|
||
| /** QueryLeafProfilerAggregator that aggregates leaf breakdowns at thread level */ | ||
| class QueryLeafProfilerThreadAggregator { | ||
| private final ConcurrentMap<Thread, QueryLeafProfilerBreakdown> queryThreadBreakdowns; | ||
| private long queryStartTime = Long.MAX_VALUE; | ||
| private long queryTotalTime = 0; | ||
|
|
||
| public QueryLeafProfilerThreadAggregator() { | ||
| queryThreadBreakdowns = new ConcurrentHashMap<>(); | ||
| } | ||
|
|
||
| public long getQueryStartTime() { | ||
| return queryStartTime; | ||
| } | ||
|
|
||
| public long getQueryTotalTime() { | ||
| return queryTotalTime; | ||
| } | ||
|
|
||
| private QueryLeafProfilerBreakdown getQueryThreadProfilerBreakdown() { | ||
| final Thread currentThread = Thread.currentThread(); | ||
| // See please https://bugs.openjdk.java.net/browse/JDK-8161372 | ||
| final QueryLeafProfilerBreakdown profilerBreakdown = queryThreadBreakdowns.get(currentThread); | ||
|
|
||
| if (profilerBreakdown != null) { | ||
| return profilerBreakdown; | ||
| } | ||
|
|
||
| return queryThreadBreakdowns.computeIfAbsent( | ||
| currentThread, _ -> new QueryLeafProfilerBreakdown()); | ||
| } | ||
|
|
||
| public QueryProfilerTimer getTimer(QueryProfilerTimingType timingType) { | ||
| assert timingType.isLeafLevel(); | ||
|
|
||
| return getQueryThreadProfilerBreakdown().getTimer(timingType); | ||
| } | ||
|
|
||
| public List<AggregatedQueryLeafProfilerResult> getAggregatedQueryLeafProfilerResults() { | ||
| final List<AggregatedQueryLeafProfilerResult> profilerResults = new ArrayList<>(); | ||
| for (Thread thread : queryThreadBreakdowns.keySet()) { | ||
| final AggregatedQueryLeafProfilerResult aggregatedQueryLeafProfilerResult = | ||
| queryThreadBreakdowns.get(thread).getLeafProfilerResult(thread); | ||
| queryStartTime = Math.min(queryStartTime, aggregatedQueryLeafProfilerResult.getStartTime()); | ||
| queryTotalTime += aggregatedQueryLeafProfilerResult.getTotalTime(); | ||
| profilerResults.add(aggregatedQueryLeafProfilerResult); | ||
| } | ||
|
|
||
| return profilerResults; | ||
| } | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -17,46 +17,89 @@ | |
|
|
||
| package org.apache.lucene.sandbox.search; | ||
|
|
||
| import java.util.Arrays; | ||
| import java.util.Collection; | ||
| import java.util.Collections; | ||
| import java.util.HashMap; | ||
| import java.util.List; | ||
| import java.util.Map; | ||
| import org.apache.lucene.search.Query; | ||
| import org.apache.lucene.util.CollectionUtil; | ||
|
|
||
| /** | ||
| * A record of timings for the various operations that may happen during query execution. A node's | ||
| * time may be composed of several internal attributes (rewriting, weighting, scoring, etc). | ||
| */ | ||
| class QueryProfilerBreakdown { | ||
| private static final Collection<QueryProfilerTimingType> QUERY_LEVEL_TIMING_TYPE = | ||
| Arrays.stream(QueryProfilerTimingType.values()).filter(t -> !t.isLeafLevel()).toList(); | ||
| private final Map<QueryProfilerTimingType, QueryProfilerTimer> queryProfilerTimers; | ||
|
|
||
| /** The accumulated timings for this query node */ | ||
| private final QueryProfilerTimer[] timers; | ||
| private final QueryLeafProfilerThreadAggregator queryLeafProfilerAggregator; | ||
|
|
||
| /** Sole constructor. */ | ||
| public QueryProfilerBreakdown() { | ||
| timers = new QueryProfilerTimer[QueryProfilerTimingType.values().length]; | ||
| for (int i = 0; i < timers.length; ++i) { | ||
| timers[i] = new QueryProfilerTimer(); | ||
| queryProfilerTimers = new HashMap<>(); | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Or use an EnumMap? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks @jpountz. This is a good suggestion, and I missed that while making the changes. But when I started using it, I was curious about the implementation and saw that it initializes reference for all the Enum values during initialization: Since we are using small subset of |
||
| this.queryLeafProfilerAggregator = new QueryLeafProfilerThreadAggregator(); | ||
|
|
||
| for (QueryProfilerTimingType timingType : QUERY_LEVEL_TIMING_TYPE) { | ||
| queryProfilerTimers.put(timingType, new QueryProfilerTimer()); | ||
| } | ||
| } | ||
|
|
||
| public QueryProfilerTimer getTimer(QueryProfilerTimingType type) { | ||
| return timers[type.ordinal()]; | ||
| public final QueryProfilerTimer getTimer(QueryProfilerTimingType timingType) { | ||
| if (timingType.isLeafLevel()) { | ||
| return queryLeafProfilerAggregator.getTimer(timingType); | ||
| } | ||
|
|
||
| // Return the query level profiler timer if not | ||
| // slice level | ||
| return queryProfilerTimers.get(timingType); | ||
| } | ||
|
|
||
| /** Build a timing count breakdown. */ | ||
| public final Map<String, Long> toBreakdownMap() { | ||
| Map<String, Long> map = CollectionUtil.newHashMap(timers.length * 2); | ||
| for (QueryProfilerTimingType type : QueryProfilerTimingType.values()) { | ||
| map.put(type.toString(), timers[type.ordinal()].getApproximateTiming()); | ||
| map.put(type.toString() + "_count", timers[type.ordinal()].getCount()); | ||
| public final QueryProfilerResult getQueryProfilerResult( | ||
| Query query, List<QueryProfilerResult> childrenProfileResults) { | ||
| long queryStartTime = Long.MAX_VALUE; | ||
| long queryTotalTime = 0; | ||
| final Map<String, Long> breakdownMap = | ||
| CollectionUtil.newHashMap(QUERY_LEVEL_TIMING_TYPE.size() * 2); | ||
| for (QueryProfilerTimingType type : QUERY_LEVEL_TIMING_TYPE) { | ||
| final QueryProfilerTimer timer = queryProfilerTimers.get(type); | ||
| if (timer.getCount() > 0) { | ||
| queryStartTime = Math.min(queryStartTime, timer.getEarliestTimerStartTime()); | ||
| queryTotalTime += timer.getApproximateTiming(); | ||
| } | ||
| // TODO: Should we put only non-zero timer values in the final output? | ||
| breakdownMap.put(type.toString(), queryProfilerTimers.get(type).getApproximateTiming()); | ||
| breakdownMap.put(type.toString() + "_count", queryProfilerTimers.get(type).getCount()); | ||
| } | ||
| return Collections.unmodifiableMap(map); | ||
|
|
||
| final List<AggregatedQueryLeafProfilerResult> threadProfilerResults = | ||
| queryLeafProfilerAggregator.getAggregatedQueryLeafProfilerResults(); | ||
| queryStartTime = Math.min(queryStartTime, queryLeafProfilerAggregator.getQueryStartTime()); | ||
| queryTotalTime += queryLeafProfilerAggregator.getQueryTotalTime(); | ||
|
|
||
| return new QueryProfilerResult( | ||
| getTypeFromQuery(query), | ||
| getDescriptionFromQuery(query), | ||
| Collections.unmodifiableMap(breakdownMap), | ||
| threadProfilerResults, | ||
| childrenProfileResults, | ||
| queryStartTime, | ||
| queryTotalTime); | ||
| } | ||
|
|
||
| public final long toTotalTime() { | ||
| long total = 0; | ||
| for (QueryProfilerTimer timer : timers) { | ||
| total += timer.getApproximateTiming(); | ||
| private String getTypeFromQuery(Query query) { | ||
| // Anonymous classes won't have a name, | ||
| // we need to get the super class | ||
| if (query.getClass().getSimpleName().isEmpty()) { | ||
| return query.getClass().getSuperclass().getSimpleName(); | ||
| } | ||
| return total; | ||
| return query.getClass().getSimpleName(); | ||
| } | ||
|
|
||
| private String getDescriptionFromQuery(Query query) { | ||
| return query.toString(); | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What about using an
EnumMap, which will be implemented by an array like this one under the hood?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Although minor, but
EnumMapwill initialize the timers for all the possible keysvals = new Object[keyUniverse.length];. It seemed bit unnecessary to me, but we can change if usingEnumMapimproves the readability.