Skip to content

Commit

Permalink
refact: refactor TMModel.TokenizerThread and add debug logging
Browse files Browse the repository at this point in the history
  • Loading branch information
sebthom committed Jun 17, 2023
1 parent 9bb82e6 commit 83d01a7
Show file tree
Hide file tree
Showing 2 changed files with 106 additions and 91 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
*/
package org.eclipse.tm4e.core.model;

import static java.lang.System.Logger.Level.ERROR;
import static java.lang.System.Logger.Level.*;
import static org.eclipse.tm4e.core.internal.utils.MoreCollections.findLastElement;
import static org.eclipse.tm4e.core.internal.utils.NullSafetyHelper.castNonNull;

Expand All @@ -23,7 +23,6 @@
import java.util.Set;
import java.util.concurrent.CopyOnWriteArraySet;
import java.util.concurrent.PriorityBlockingQueue;
import java.util.function.IntConsumer;

import org.eclipse.jdt.annotation.Nullable;
import org.eclipse.tm4e.core.grammar.IGrammar;
Expand Down Expand Up @@ -59,6 +58,18 @@ public TMModel(final AbstractModelLines lines) {
invalidateLine(0);
}

private static final boolean DEBUG_LOGGING = LOGGER.isLoggable(DEBUG);

private void logDebug(final String msg, final Object... args) {
if (!DEBUG_LOGGING)
return;
final var t = Thread.currentThread();
final var caller = t.getStackTrace()[2];
final var threadName = t.getName().endsWith(TokenizerThread.class.getSimpleName()) ? "tknz" : t.getName();
LOGGER.log(DEBUG, "[" + threadName + "] " + getClass().getSimpleName() + "." + caller.getMethodName() +
String.format(msg, args));
}

/**
* The {@link TokenizerThread} continuously runs tokenizing in background on the lines found in
* {@link TMModel#modelLines}.
Expand All @@ -81,9 +92,10 @@ private final class TokenizerThread extends Thread {

@Override
public void run() {
while (tokenizerThread == this && !isInterrupted()) {
try {
tokenizerThreadIsWorking = false;
try {
while (tokenizerThread == this) {
tokenizerThreadIsWorking = !invalidLines.isEmpty();

final int lineIndexToProcess = invalidLines.take();
tokenizerThreadIsWorking = true;

Expand All @@ -98,9 +110,9 @@ public void run() {
LOGGER.log(ERROR, ex.getMessage(), ex);
invalidateLine(lineIndexToProcess);
}
} catch (final InterruptedException e) {
interrupt();
}
} catch (final InterruptedException e) {
interrupt();
}
tokenizerThreadIsWorking = false;
}
Expand All @@ -114,102 +126,85 @@ public void run() {
* @param startLineIndex 0-based
*/
private void revalidateTokens(final int startLineIndex) {
if (DEBUG_LOGGING)
logDebug("(%d)", startLineIndex);

final var changedRanges = new ArrayList<Range>();
final IntConsumer lineChangedListener = lineNumber -> {
final Range previousRange = findLastElement(changedRanges);
if (previousRange != null && previousRange.toLineNumber == lineNumber - 1) {
previousRange.toLineNumber++; // extend previous range
} else {
changedRanges.add(new Range(lineNumber)); // insert new range
}
};

int lineIndex = startLineIndex;
final long startTime = System.currentTimeMillis();

revalidateTokens_Loop: {
while (lineIndex < modelLines.getNumberOfLines()) {
switch (updateTokensOfLine(lineChangedListener, lineIndex, MAX_TIME_PER_LINE)) {
case DONE:
break revalidateTokens_Loop;
case UPDATE_FAILED:
// mark the current line as invalid and add it to the end of the queue
invalidateLine(lineIndex);
break revalidateTokens_Loop;
case NEXT_LINE_IS_OUTDATED:
if (System.currentTimeMillis() - startTime >= MAX_LOOP_TIME) {
// mark the next line as invalid and add it to the end of the queue
invalidateLine(lineIndex + 1);
break revalidateTokens_Loop;
}
lineIndex++;
break;
}
}
}

if (!changedRanges.isEmpty()) {
emit(new ModelTokensChangedEvent(changedRanges, TMModel.this));
}
}

private enum UpdateTokensOfLineResult {
DONE,
UPDATE_FAILED,
NEXT_LINE_IS_OUTDATED,
}
while (true) {

/**
* @param lineIndex 0-based
*/
private UpdateTokensOfLineResult updateTokensOfLine(final IntConsumer lineChangedListener, final int lineIndex,
final Duration timeLimit) {

final var modelLine = modelLines.getOrNull(lineIndex);
if (modelLine == null) {
return UpdateTokensOfLineResult.DONE; // line does not exist anymore
}

/*
* (re-)tokenize the requested line
*/
final TokenizationResult r;
final String lineText;
try {
lineText = modelLines.getLineText(lineIndex);
r = castNonNull(tokenizer).tokenize(lineText, modelLine.startState, 0, timeLimit);
} catch (final Exception ex) {
LOGGER.log(ERROR, ex.toString());
return UpdateTokensOfLineResult.UPDATE_FAILED;
}
final var modelLine = modelLines.getOrNull(lineIndex);
if (modelLine == null) {
if (DEBUG_LOGGING)
logDebug("(%d) >> DONE - line %d does not exist anymore", startLineIndex, lineIndex);
break revalidateTokens_Loop; // EXIT: line does not exist anymore
}

if (r.stoppedEarly) {
// treat the rest of the line as one default token
r.tokens.add(new TMToken(r.actualStopOffset, ""));
// Use the line's starting state as end state in case of incomplete tokenization
r.endState = modelLine.startState;
}
// (re-)tokenize the line
if (DEBUG_LOGGING)
logDebug("(%d) >> tokenizing line %d...", startLineIndex, lineIndex);
final TokenizationResult r;
try {
final String lineText = modelLines.getLineText(lineIndex);
r = castNonNull(tokenizer).tokenize(lineText, modelLine.startState, 0, MAX_TIME_PER_LINE);
} catch (final Exception ex) {
LOGGER.log(ERROR, ex.toString());
// mark the current line as invalid and add it to the end of the queue
invalidateLine(lineIndex);
break revalidateTokens_Loop; // EXIT: error occurred
}

modelLine.tokens = r.tokens;
lineChangedListener.accept(lineIndex + 1);
modelLine.isInvalid = false;
// check if complete line was tokenized
if (r.stoppedEarly) {
// treat the rest of the line as one default token
r.tokens.add(new TMToken(r.actualStopOffset, ""));
// Use the line's starting state as end state in case of incomplete tokenization
r.endState = modelLine.startState;
}
modelLine.tokens = r.tokens;

// add the line number to the changed ranges
final int lineNumber = lineIndex + 1;
final Range previousRange = findLastElement(changedRanges);
if (previousRange != null && previousRange.toLineNumber == lineNumber - 1) {
previousRange.toLineNumber = lineNumber; // extend previous range
} else {
changedRanges.add(new Range(lineNumber)); // insert new range
}
modelLine.isInvalid = false;

// check if the next line requires re-tokenization too
lineIndex++;
final var nextModelLine = modelLines.getOrNull(lineIndex);
if (nextModelLine == null) {
if (DEBUG_LOGGING)
logDebug("(%d) >> DONE - next line %d does not exist", startLineIndex, lineIndex);
break revalidateTokens_Loop;
}
if (!nextModelLine.isInvalid && nextModelLine.startState.equals(r.endState)) {
// has matching start state == is up to date
if (DEBUG_LOGGING)
logDebug("(%d) >> DONE - tokens of next line %d are up-to-date", startLineIndex, lineIndex);
break revalidateTokens_Loop;
}

/*
* check if the next line now requires a token update too
*/
final var nextModelLine = modelLines.getOrNull(lineIndex + 1);
if (nextModelLine == null) {
return UpdateTokensOfLineResult.DONE; // next line does not exist
// next line is out of date
nextModelLine.startState = r.endState;
if (System.currentTimeMillis() - startTime >= MAX_LOOP_TIME) {
if (DEBUG_LOGGING)
logDebug("(%d) >> DONE - no more time left", startLineIndex);
invalidateLine(lineIndex); // mark the next line as invalid and add it to the end of the queue
break revalidateTokens_Loop;
}
}
}

if (!nextModelLine.isInvalid && nextModelLine.startState.equals(r.endState)) {
return UpdateTokensOfLineResult.DONE; // next line is valid and has matching start state
if (!changedRanges.isEmpty()) {
emit(new ModelTokensChangedEvent(changedRanges, TMModel.this));
}

// next line is out of date
nextModelLine.startState = r.endState;
return UpdateTokensOfLineResult.NEXT_LINE_IS_OUTDATED;
}
}

Expand All @@ -224,11 +219,12 @@ public BackgroundTokenizationState getBackgroundTokenizationState() {
}

@Override
public void setGrammar(final IGrammar grammar) {
public synchronized void setGrammar(final IGrammar grammar) {
if (!Objects.equals(grammar, this.grammar)) {
this.grammar = grammar;
final var tokenizer = this.tokenizer = new TMTokenization(grammar);
modelLines.get(0).startState = tokenizer.getInitialState();
invalidateLine(0);
startTokenizerThread();
}
}
Expand Down Expand Up @@ -278,6 +274,8 @@ private synchronized void stopTokenizerThread() {
}

private void emit(final ModelTokensChangedEvent e) {
if (DEBUG_LOGGING)
logDebug("(%s)", e);
for (final IModelTokensChangedListener listener : listeners) {
listener.modelTokensChanged(e);
}
Expand All @@ -295,6 +293,8 @@ public int getNumberOfLines() {

/** Marks the given line as out-of-date resulting in async re-parsing */
void invalidateLine(final int lineIndex) {
if (DEBUG_LOGGING)
logDebug("(%d)", lineIndex);
final var modelLine = modelLines.getOrNull(lineIndex);
if (modelLine != null) {
modelLine.isInvalid = true;
Expand Down
15 changes: 15 additions & 0 deletions org.eclipse.tm4e.core/src/test/resources/jul.properties
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
#to activate, launch with -Djava.util.logging.config.file=${workspace_loc}\tm4e\org.eclipse.tm4e.core\src\test\resources\jul.properties

# configure log output targets
handlers=java.util.logging.ConsoleHandler
java.util.logging.ConsoleHandler.level=FINE
java.util.logging.ConsoleHandler.formatter=java.util.logging.SimpleFormatter

# configure log format
# %1=date %2=source %3=logger %4=level %5=message, %6=thrown
java.util.logging.SimpleFormatter.format=%1$tT %4$s [%3$s] %5$s %n


# configure log levels
.level=INFO
org.eclipse.tm4e.core.model.TMModel.level=FINE

0 comments on commit 83d01a7

Please sign in to comment.