-
Notifications
You must be signed in to change notification settings - Fork 4.6k
[Dataflow Streaming] [Multi Key] Introduce KeyGroupWorkQueue and integrate with BoundedQueueExecutor #38767
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
[Dataflow Streaming] [Multi Key] Introduce KeyGroupWorkQueue and integrate with BoundedQueueExecutor #38767
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -25,6 +25,7 @@ | |
| import java.util.IntSummaryStatistics; | ||
| import java.util.Map; | ||
| import java.util.Map.Entry; | ||
| import java.util.Objects; | ||
| import java.util.Optional; | ||
| import java.util.concurrent.TimeUnit; | ||
| import java.util.function.Consumer; | ||
|
|
@@ -52,6 +53,7 @@ | |
| import org.apache.beam.sdk.annotations.Internal; | ||
| import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Preconditions; | ||
| import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableList; | ||
| import org.checkerframework.checker.nullness.qual.Nullable; | ||
| import org.joda.time.Duration; | ||
| import org.joda.time.Instant; | ||
|
|
||
|
|
@@ -74,6 +76,7 @@ public final class Work implements RefreshableWork { | |
| private final Instant startTime; | ||
| private final Map<LatencyAttribution.State, Duration> totalDurationPerState; | ||
| private final WorkId id; | ||
| private final Optional<KeyGroup> keyGroup; | ||
| private final String latencyTrackingId; | ||
| private final long serializedWorkItemSize; | ||
| private volatile TimedState currentState; | ||
|
|
@@ -101,6 +104,11 @@ private Work( | |
| // keyUniverse inside EnumMap every time. | ||
| this.totalDurationPerState = new EnumMap<>(EMPTY_ENUM_MAP); | ||
| this.id = WorkId.of(workItem); | ||
| this.keyGroup = | ||
| workItem.hasKeyGroup() | ||
| ? Optional.of( | ||
| KeyGroup.create(workItem.getKeyGroup().getHigh(), workItem.getKeyGroup().getLow())) | ||
| : Optional.empty(); | ||
| this.latencyTrackingId = | ||
| Long.toHexString(workItem.getShardingKey()) | ||
| + '-' | ||
|
|
@@ -383,6 +391,14 @@ private boolean isCommitPending() { | |
| abstract Instant startTime(); | ||
| } | ||
|
|
||
| public String getComputationId() { | ||
| return processingContext.computationId(); | ||
| } | ||
|
|
||
| public Optional<KeyGroup> getKeyGroup() { | ||
| return keyGroup; | ||
| } | ||
|
|
||
| @AutoValue | ||
| public abstract static class ProcessingContext { | ||
|
|
||
|
|
@@ -416,4 +432,48 @@ private Optional<KeyedGetDataResponse> fetchKeyedState(KeyedGetDataRequest reque | |
| return Optional.ofNullable(getDataClient().getStateData(computationId(), request)); | ||
| } | ||
| } | ||
|
|
||
| public static final class KeyGroup { | ||
| private final long high; | ||
| private final long low; | ||
|
|
||
| private KeyGroup(long high, long low) { | ||
| this.high = high; | ||
| this.low = low; | ||
| } | ||
|
|
||
| public static KeyGroup create(long high, long low) { | ||
| return new KeyGroup(high, low); | ||
| } | ||
|
|
||
| public long high() { | ||
| return high; | ||
| } | ||
|
|
||
| public long low() { | ||
| return low; | ||
| } | ||
|
|
||
| @Override | ||
| public boolean equals(@Nullable Object o) { | ||
| if (this == o) { | ||
| return true; | ||
| } | ||
| if (!(o instanceof KeyGroup)) { | ||
| return false; | ||
| } | ||
| KeyGroup other = (KeyGroup) o; | ||
| return high == other.high && low == other.low; | ||
| } | ||
|
|
||
| @Override | ||
| public int hashCode() { | ||
| return Objects.hash(high, low); | ||
| } | ||
|
|
||
| @Override | ||
| public String toString() { | ||
| return "KeyGroup{" + "high=" + high + ", low=" + low + '}'; | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. format as padded-hex, then it's easier to see whole # by concatenating |
||
| } | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -20,6 +20,7 @@ | |
| import static org.apache.beam.sdk.util.Preconditions.checkArgumentNotNull; | ||
| import static org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Preconditions.checkArgument; | ||
|
|
||
| import java.util.Optional; | ||
| import java.util.concurrent.ConcurrentLinkedQueue; | ||
| import java.util.concurrent.LinkedBlockingQueue; | ||
| import java.util.concurrent.ThreadFactory; | ||
|
|
@@ -29,6 +30,7 @@ | |
| import javax.annotation.concurrent.GuardedBy; | ||
| import org.apache.beam.runners.dataflow.worker.streaming.BoundedQueueExecutorWorkHandle; | ||
| import org.apache.beam.runners.dataflow.worker.streaming.ExecutableWork; | ||
| import org.apache.beam.runners.dataflow.worker.streaming.Work; | ||
| import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.annotations.VisibleForTesting; | ||
| import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Preconditions; | ||
| import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.util.concurrent.Monitor; | ||
|
|
@@ -85,7 +87,8 @@ public BoundedQueueExecutor( | |
| int maximumElementsOutstanding, | ||
| long maximumBytesOutstanding, | ||
| ThreadFactory threadFactory, | ||
| boolean useFairMonitor) { | ||
| boolean useFairMonitor, | ||
| boolean useKeyGroupWorkQueue) { | ||
| this.maximumPoolSize = initialMaximumPoolSize; | ||
| monitor = new Monitor(useFairMonitor); | ||
| executor = | ||
|
|
@@ -94,7 +97,7 @@ public BoundedQueueExecutor( | |
| initialMaximumPoolSize, | ||
| keepAliveTime, | ||
| unit, | ||
| new LinkedBlockingQueue<>(), | ||
| useKeyGroupWorkQueue ? new KeyGroupWorkQueue() : new LinkedBlockingQueue<>(), | ||
| threadFactory) { | ||
| @Override | ||
| protected void beforeExecute(Thread t, Runnable r) { | ||
|
|
@@ -313,7 +316,7 @@ public synchronized void close() { | |
| } | ||
| } | ||
|
|
||
| private static final class QueuedWork implements Runnable { | ||
| static final class QueuedWork implements Runnable { | ||
|
|
||
| private final ExecutableWork work; | ||
| private final BoundedQueueExecutorWorkHandleImpl handle; | ||
|
|
@@ -378,6 +381,23 @@ BoundedQueueExecutorWorkHandleImpl createBudgetHandle(int elements, long bytes) | |
| return new BoundedQueueExecutorWorkHandleImpl(elements, bytes); | ||
| } | ||
|
|
||
| /** Poll work for a specific computationId and keyGroup. */ | ||
| public Optional<ExecutableWork> pollWork( | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. should we just use nullable instead of optional? since it's somewhat internal API, removing it simplifies allocations and we can enable nullness checker to ensure it is used safely |
||
| String computationId, Work.KeyGroup keyGroup, BoundedQueueExecutorWorkHandle handle) { | ||
| checkArgument(handle instanceof BoundedQueueExecutorWorkHandleImpl); | ||
| BoundedQueueExecutorWorkHandleImpl internalHandle = (BoundedQueueExecutorWorkHandleImpl) handle; | ||
| if (!(executor.getQueue() instanceof KeyGroupWorkQueue)) { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. instead of instanceof+cast, how about a nullable KeyGroupWorkQueue member variable set if that is the backing queue |
||
| return Optional.empty(); | ||
| } | ||
| QueuedWork queuedWork = | ||
| ((KeyGroupWorkQueue) executor.getQueue()).pollWork(computationId, keyGroup); | ||
| if (queuedWork == null) { | ||
| return Optional.empty(); | ||
| } | ||
| internalHandle.merge(queuedWork.getHandle()); | ||
| return Optional.of(queuedWork.getWork()); | ||
| } | ||
|
|
||
| private void decrementCounters(int elements, long bytes) { | ||
| // All threads queue decrements and one thread grabs the monitor and updates | ||
| // counters. We do this to reduce contention on monitor which is locked by | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
instead of optional what about a KeyGroup.DEFAULT instance?