Skip to content

Commit

Permalink
Add documentation and other MR requests
Browse files Browse the repository at this point in the history
  • Loading branch information
billoley committed Oct 2, 2023
1 parent 7b84cf6 commit 65f7862
Show file tree
Hide file tree
Showing 2 changed files with 133 additions and 41 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,11 @@
import datawave.query.iterator.ResultCountingIterator;
import datawave.query.iterator.profile.QuerySpan;

// This class maintains common state and logic to determine if the QueryIterator's stack of boolean logic
// should yield to ensure that resources are shared appropriately, to return collected metrics if configured,
// and to ensure that there is still a client waiting for a response.
/*
* This class maintains common state and logic to determine if the QueryIterator's stack of boolean logic
* should yield to ensure that resources are shared appropriately, to return collected metrics if configured,
* and to ensure that there is still a client waiting for a response.
*/
public class WaitWindowObserver {

private static final Logger log = Logger.getLogger(WaitWindowObserver.class);
Expand Down Expand Up @@ -81,8 +83,10 @@ public WaitWindowObserver() {

}

// Using the WaitWindowTask in a Timer will limit the number of times that System.currentTimeMillis()
// is called while still decrementing remainingTimeMs as appropriate to enable yields.
/*
* Using the WaitWindowTask in a Timer will limit the number of times that System.currentTimeMillis() is called while still decrementing remainingTimeMs as
* appropriate to enable yields.
*/
private class WaitWindowTimerTask extends TimerTask {
@Override
public void run() {
Expand All @@ -95,9 +99,10 @@ public void run() {
}
}

// Ensure that we are only creating one Timer object per JVM (tablet server) for
// scheduling WaitWindowTimerTasks. Use the double null check to limit synchronization
// and prevent a race condition that overwrites WaitWindowObserver.timer.
/*
* Ensure that we are only creating one Timer object per JVM (tablet server) for scheduling WaitWindowTimerTasks. Use the double null check to limit
* synchronization and prevent a race condition that overwrites WaitWindowObserver.timer.
*/
private static Timer getTimer() {
if (WaitWindowObserver.timer == null) {
synchronized (WaitWindowObserver.class) {
Expand All @@ -109,19 +114,27 @@ private static Timer getTimer() {
return WaitWindowObserver.timer;
}

/*
* Set seekRange, remainingTimeMs, endOfWaitWindow and start the Timer
*/
public void start(Range seekRange, long yieldThresholdMs) {
this.seekRange = seekRange;
this.remainingTimeMs.set(yieldThresholdMs);
this.endOfWaitWindow = yieldThresholdMs + System.currentTimeMillis();
WaitWindowObserver.getTimer().schedule(this.timerTask, this.checkPeriod, this.checkPeriod);
}

// Ensure that the WaitWindowTimerTask is cancelled. Called from QueryIterator.hasTop.
/*
* Ensure that the WaitWindowTimerTask is cancelled. Called from QueryIterator.hasTop.
*/
public void stop() {
this.timerTask.cancel();
}

// Called from waitWindowOverrun() and from places that use a timeout for polling or retrieving Future results
/*
* Called from waitWindowOverrun() and from places that use a timeout for polling or retrieving Future results remainingTimeMs get updated periodically in
* another thread
*/
public long remainingTimeMs() {
if (this.yieldCallback == null) {
return Long.MAX_VALUE;
Expand All @@ -130,10 +143,16 @@ public long remainingTimeMs() {
}
}

/*
* remainingTimeMs get updated periodically in another thread
*/
public boolean waitWindowOverrun() {
return remainingTimeMs.get() <= 0;
}

/*
* If we have exceeded the wait window, then immediately throw a WaitWindowOverrunException containing a yield key corresponding to the provided key.
*/
public void checkWaitWindow(Key currentKey, boolean yieldToBeginning) {
if (this.yieldCallback != null && waitWindowOverrun()) {
Key currentYieldKey = createYieldKey(currentKey, yieldToBeginning);
Expand All @@ -145,9 +164,10 @@ public void checkWaitWindow(Key currentKey, boolean yieldToBeginning) {
}
}

// There can be many embedded AndIterators, OrIterators, and Ivarators where a WaitWindowOverrunException
// can be thrown from. As the exception makes its way to the top of the call chain, we need to evaluate
// the yieldKey at each level.
/*
* There can be many embedded AndIterators, OrIterators, and Ivarators where a WaitWindowOverrunException can be thrown from. As the exception makes its way
* to the top of the call chain, we need to evaluate the yieldKey at each level.
*/
public void propagateException(Key key, boolean yieldToBeginning, boolean keepLowest, WaitWindowOverrunException e) {
Key yieldKey;
if (key == null) {
Expand All @@ -163,8 +183,10 @@ public void propagateException(Key key, boolean yieldToBeginning, boolean keepLo
throw new WaitWindowOverrunException(yieldKey);
}

// When yieldKey is set, yield on the second call from QueryIterator.hasTop so that the first call can return the
// document that contains the WAIT_WINDOW_OVERRUN and TIMING_METADATA attributes
/*
* When yieldKey is set and collectTimingDetails=true, then we yield on the second call from QueryIterator.hasTop so that the first call can return the
* document that contains the WAIT_WINDOW_OVERRUN and TIMING_METADATA attributes. readyToYield will be set to true after the first call to yieldOnOverrun
*/
public void yieldOnOverrun() {
if (this.yieldCallback != null && this.yieldKey != null && !this.yieldCallback.hasYielded()) {
if (readyToYield) {
Expand All @@ -181,7 +203,9 @@ public void yieldOnOverrun() {
}
}

// Create a yield key with YIELD_AT_BEGIN or YIELD_AT_END marker
/*
* Create a yield key with YIELD_AT_BEGIN or YIELD_AT_END marker
*/
public Key createYieldKey(Key yieldKey, boolean yieldToBeginning) {
if (isShardKey(yieldKey)) {
return createShardYieldKey(yieldKey, yieldToBeginning);
Expand All @@ -190,20 +214,32 @@ public Key createYieldKey(Key yieldKey, boolean yieldToBeginning) {
}
}

// Create a key that sorts either before or after all field keys for this document key.
// A colQual starting with ! sorts before all keys whose colFam starts with an alphanumeric character.
// A colQual starting with \uffff sorts after all keys whose colFam starts with an alphanumeric character.
// We are adding sort-irrelevant marker text after that symbol to easily identify the key
/*
* Create a key that sorts either before or after all field keys for this document key. A colQual starting with ! sorts before all keys whose colFam starts
* with an alphanumeric character. A colQual starting with \uffff sorts after all keys whose colFam starts with an alphanumeric character. We are adding
* sort-irrelevant marker text after that symbol to easily identify the key
*
* YYYYMMDD_NN !YIELD_AT_BEGIN YYYYMMDD_NN \uffffYIELD_AT_END
*
* ensureYieldKeyAfterRangeStart may add a \x00 after the colFam if the produced key matches the startKey of a non-inclusive seekRange
*/
public Key createShardYieldKey(Key key, boolean yieldToBeginning) {
// if key already contains YIELD_AT_END then we must yield to the end
Text marker = yieldToBeginning && !hasEndMarker(key) ? YIELD_AT_BEGIN : YIELD_AT_END;
return ensureYieldKeyAfterRangeStart(new Key(key.getRow(), marker));
}

// Create a key that sorts either before or after all field keys for this document key.
// A colQual starting with ! sorts before all keys with the same row/colFam and an alphanumeric colQual
// A colQual starting with \uffff sorts after all keys with the same row/colFam and an alphanumeric colQual
// Also adding a sort-irrelevant marker text after that symbol to easily identify the key
/*
* Create a key that sorts either before or after all field keys for this document key. A colQual starting with ! sorts before all keys with the same
* row/colFam and an alphanumeric colQual A colQual starting with \uffff sorts after all keys with the same row/colFam and an alphanumeric colQual Also
* adding a sort-irrelevant marker text after that symbol to easily identify the key
*
* sortedUIDs YYYYMMDD_NN datatype\x00uid:!YIELD_AT_BEGIN YYYYMMDD_NN datatype\x00uid:\uffffYIELD_AT_END
*
* !sortedUIDs YYYYMMDD_NN datatype\x00uid:!YIELD_AT_BEGIN\x00field\x00value YYYYMMDD_NN datatype\x00uid:field\x00value\uffffYIELD_AT_END
*
* ensureYieldKeyAfterRangeStart may add a \x00 after the colQual if the produced key matches the startKey of a non-inclusive seekRange
*/
public Key createDocumentYieldKey(Key key, boolean yieldToBeginning) {
// if key already contains YIELD_AT_END then we must yield to the end
Text marker = yieldToBeginning && !hasEndMarker(key) ? YIELD_AT_BEGIN : YIELD_AT_END;
Expand All @@ -220,18 +256,19 @@ public Key createDocumentYieldKey(Key key, boolean yieldToBeginning) {
if (origColQual.isEmpty()) {
colQual = marker;
} else if (yieldToBeginning) {
colQual = new Text(marker.toString() + "\0" + origColQual);
colQual = new Text(marker + "\0" + origColQual);
} else {
colQual = new Text(origColQual + marker.toString());
colQual = new Text(origColQual + marker);
}
}
newKey = new Key(key.getRow(), key.getColumnFamily(), colQual);
}
return ensureYieldKeyAfterRangeStart(newKey);
}

// When the current seekRange is non-inclusive, we can not return the startKey of the range
// as a yield key. Instead, we have to return the following key.
/*
* When the current seekRange is non-inclusive, we can not return the startKey of the range as a yield key. Instead, we have to return the following key.
*/
private Key ensureYieldKeyAfterRangeStart(Key key) {
if (!this.seekRange.isStartKeyInclusive()) {
Key seekStartKey = this.seekRange.getStartKey();
Expand All @@ -248,6 +285,12 @@ private Key ensureYieldKeyAfterRangeStart(Key key) {
return key;
}

/*
* When yieldKey is set and collectTimingDetails=true, then we yield on the second call from QueryIterator.hasTop so that the first call can return the
* document that contains the WAIT_WINDOW_OVERRUN and TIMING_METADATA attributes. Since we can not yield on the same key that was just returned, we have to
* yield on a key that follows the yieldKey. For YIELD_AT_END, we add a null char after YIELD_AT_END which is at the end of the colFam or colQual. For
* YIELD_AT_BEGIN, we add a null character after that marker.
*/
private Key yieldKeyAfterOverrun(Key key) {
Text row = key.getRow();
Text colFam = key.getColumnFamily();
Expand Down Expand Up @@ -277,6 +320,10 @@ public void setYieldKey(Key yieldKey) {
this.yieldKey = yieldKeyAfterOverrun(yieldKey);
}

/*
* When yieldKey is set and collectTimingDetails=true, then we yield on the second call from QueryIterator.hasTop so that the first call can return the
* document that contains the WAIT_WINDOW_OVERRUN and TIMING_METADATA attributes. readyToYield will be set to true after the first call to yieldOnOverrun
*/
public boolean isReadyToYield() {
return readyToYield && yieldKey != null;
}
Expand All @@ -285,31 +332,54 @@ public Key getYieldKey() {
return yieldKey;
}

/*
* YIELD_AT_BEGIN and YIELD_AT_END markers for Document keys are in the colQual, so if the colFam is empty or contains one of these markers, then it is a
* shard key
*/
static public boolean isShardKey(Key key) {
Text colFam = key.getColumnFamily();
return colFam.equals(new Text()) || hasBeginMarker(colFam);
}

/*
* Check if YIELD_AT_BEGIN or YIELD_AT_END is in either the colFam or colQual
*/
static public boolean hasMarker(Key key) {
return hasBeginMarker(key) || hasEndMarker(key);
}

/*
* Check if YIELD_AT_BEGIN or YIELD_AT_END is in this Text
*/
static public boolean hasMarker(Text text) {
return hasBeginMarker(text) || hasEndMarker(text);
}

/*
* Check if YIELD_AT_BEGIN is in either the colFam or colQual
*/
static public boolean hasBeginMarker(Key key) {
return hasBeginMarker(key.getColumnFamily()) || hasBeginMarker(key.getColumnQualifier());
}

/*
* YIELD_AT_BEGIN will always be at the beginning of the Text
*/
static public boolean hasBeginMarker(Text text) {
return text.toString().contains(YIELD_AT_BEGIN_STR);
return text.toString().startsWith(YIELD_AT_BEGIN_STR);
}

/*
* Check if YIELD_AT_END is in either the colFam or colQual
*/
static public boolean hasEndMarker(Key key) {
return hasEndMarker(key.getColumnFamily()) || hasEndMarker(key.getColumnQualifier());
}

/*
* Check if YIELD_AT_END is contained in the Text. There are cases where one or more null characters get added to the end of the colFam or colQual, so we
* can not check for endsWith
*/
static public boolean hasEndMarker(Text text) {
return text.toString().contains(YIELD_AT_END_STR);
}
Expand Down Expand Up @@ -339,18 +409,28 @@ static public Text removeMarkers(Text text) {
}
}

/*
* Convenience method to produce a document containing a WAIT_WINDOW_OVERRUN attribute, This document gets returned before a yield when
* collectTimingDetails=true so that the FinalDocumenTrackingIterator can add timing details and metrics befoer returning the Document.
*/
static public Document getWaitWindowOverrunDocument() {
Document document = new Document();
document.put(WAIT_WINDOW_OVERRUN, new WaitWindowExceededMetadata());
return document;
}

/*
* Return a yieldKey for the lowest key in a collection while handling the special case of !sortedUIDs
*/
public Key lowestYieldKey(Collection<Key> keys) {
Collection<Key> keySet = new HashSet<>(keys);
Text lowestRow = keySet.stream().sorted(keyComparator).findFirst().get().getRow();
List<Key> keysInRowSortedIncreasing = keySet.stream().filter(k -> k.getRow().equals(lowestRow)).sorted(keyComparator).collect(Collectors.toList());
Key lowestKey = keysInRowSortedIncreasing.stream().findFirst().get();
if (keySet.size() > 1 && !sortedUIDs) {
// if !sortedUIDs, then the yieldKeys could have field names and values in the colQual. If the lowestKey has
// a non-empty colQual (after removing any marker) then the yieldKey will have to be constructed from the lowest
// colFam AND the lowest colQual even if those are not from the same original key.
if (keysInRowSortedIncreasing.size() > 1 && !sortedUIDs) {
Optional<String> lowestColQual = keysInRowSortedIncreasing.stream().map(k -> WaitWindowObserver.removeMarkers(k.getColumnQualifier()).toString())
.filter(Predicate.not(String::isEmpty)).sorted().findFirst();
if (removeMarkers(lowestKey.getColumnQualifier()).getLength() > 0 && lowestColQual.isPresent()) {
Expand All @@ -361,19 +441,26 @@ public Key lowestYieldKey(Collection<Key> keys) {
lowestKey = lowestKeyWithLowestColQual;
} else {
Text colQual = lowestKeyWithLowestColQual.getColumnQualifier();
lowestKey = createYieldKey(new Key(lowestKey.getRow(), lowestKey.getColumnFamily(), colQual), hasBeginMarker(lowestKey));
lowestKey = new Key(lowestKey.getRow(), lowestKey.getColumnFamily(), colQual);
}
}
}
return lowestKey;
// default to YIELD_AT_BEGIN unless the key already has a YIELD_AT_END marker
return createYieldKey(lowestKey, !hasEndMarker(lowestKey));
}

/*
* Return a yieldKey for the lowest key in a collection while handling the special case of !sortedUIDs
*/
public Key highestYieldKey(Collection<Key> keys) {
Collection<Key> keySet = new HashSet<>(keys);
Text higestRow = keySet.stream().sorted(keyComparatorReversed).findFirst().get().getRow();
List<Key> keysInRowSortedDecreasing = keySet.stream().filter(k -> k.getRow().equals(higestRow)).sorted(keyComparatorReversed)
.collect(Collectors.toList());
Key highestKey = keysInRowSortedDecreasing.stream().findFirst().get();
// if !sortedUIDs, then the yieldKeys could have field names and values in the colQual. If the highestKey has
// a non-empty colQual (after removing any marker) then the yieldKey will have to be constructed from the
// highest colFam AND the highest colQual even if those are not from the same original key.
if (keysInRowSortedDecreasing.size() > 1 && !sortedUIDs) {
Optional<String> highestColQual = keysInRowSortedDecreasing.stream().map(k -> WaitWindowObserver.removeMarkers(k.getColumnQualifier()).toString())
.filter(Predicate.not(String::isEmpty)).sorted(Comparator.reverseOrder()).findFirst();
Expand All @@ -387,12 +474,13 @@ public Key highestYieldKey(Collection<Key> keys) {
highestKey = highestKeyWithHighestColQual;
} else {
Text colQual = highestKeyWithHighestColQual.getColumnQualifier();
highestKey = createYieldKey(new Key(highestKey.getRow(), highestKey.getColumnFamily(), colQual), hasBeginMarker(highestKey));
highestKey = new Key(highestKey.getRow(), highestKey.getColumnFamily(), colQual);
}
}
}
}
return highestKey;
// default to YIELD_AT_BEGIN unless the key already has a YIELD_AT_END marker
return createYieldKey(highestKey, !hasEndMarker(highestKey));
}

public void setResultCountingIterator(ResultCountingIterator resultCountingIterator) {
Expand Down
Loading

0 comments on commit 65f7862

Please sign in to comment.