Skip to content

Commit

Permalink
Merge branch 'feature/2023-11-22/enable-mlc-server-logprobs' into vc/…
Browse files Browse the repository at this point in the history
…update_logprob
  • Loading branch information
vvchernov committed Dec 28, 2023
2 parents 214e610 + 9b053e8 commit 43b4625
Show file tree
Hide file tree
Showing 3 changed files with 26 additions and 2 deletions.
1 change: 0 additions & 1 deletion serve/mlc_serve/api/handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -261,7 +261,6 @@ async def collect_result_stream(
finish_reason=finish_reason,
logprobs=Logprobs(content=content),
)
choice.logprobs.content = content
choices.append(choice)

usage = UsageInfo(
Expand Down
26 changes: 25 additions & 1 deletion serve/mlc_serve/engine/staging_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
update_sequence,
logprob_detokenize
)
from .model_module import ModelModule, TokenizerModule
from .model_module import ModelModule, TokenizerModule, Tokenizer
from .staging_engine_worker import (
AddRequestsCommand,
CancelRequestCommand,
Expand All @@ -40,6 +40,30 @@
LOG = structlog.stdlib.get_logger(__name__)


def logprob_detokenize(tokenizer: Tokenizer, logprob_info: Tuple[Tuple, List[Tuple]]) -> Tuple[Tuple, List[Tuple]]:
"""Detokenize logprob information"""
if logprob_info is None:
return None
(res, res_logprob), top_tokens = logprob_info
top_tokens = list(top_tokens)
count = {}
logprob_dict = {}
# dedup duplicates
# Todo: Make sure decode can generate different tokens
for top_token, _ in top_tokens:
detokenized = tokenizer.decode(top_token)
if detokenized in count:
count[detokenized] += 1
else:
count[detokenized] = 1
for top_token, top_logprob in top_tokens:
detokenized = tokenizer.decode(top_token)
if count[detokenized] == 1:
logprob_dict[detokenized] = float(top_logprob)
else:
logprob_dict[f"{detokenized}_{top_token}"] = float(top_logprob)
return (str(tokenizer.decode(res)), res_logprob), logprob_dict

class StagingInferenceEngine(ScopedInferenceEngine):
"""
An implementation of InferenceEngine that offloads the text generation loop to another worker process,
Expand Down
1 change: 1 addition & 0 deletions serve/mlc_serve/engine/staging_engine_worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
SequenceId,
GenerationSequence,
)

from .metrics import PrometheusMetrics
from .metrics_labels import *
from .model_module import (
Expand Down

0 comments on commit 43b4625

Please sign in to comment.