Skip to content

Commit

Permalink
update
Browse files Browse the repository at this point in the history
  • Loading branch information
merrymercy committed Sep 23, 2024
1 parent 0c5d609 commit 82c2fd0
Showing 1 changed file with 0 additions and 3 deletions.
3 changes: 0 additions & 3 deletions python/sglang/srt/model_executor/model_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -467,7 +467,6 @@ def init_cuda_graphs(self):
logger.info("Capture cuda graph begin. This can take up to several minutes.")
self.cuda_graph_runner = CudaGraphRunner(self)

@torch.inference_mode()
def forward_decode(self, batch: ScheduleBatch):
if self.server_args.lora_paths is not None:
self.lora_manager.prepare_lora_batch(batch)
Expand All @@ -481,7 +480,6 @@ def forward_decode(self, batch: ScheduleBatch):
batch.input_ids, input_metadata.positions, input_metadata
)

@torch.inference_mode()
def forward_extend(self, batch: ScheduleBatch):
input_metadata = InputMetadata.from_schedule_batch(self, batch)
if self.server_args.lora_paths is not None:
Expand All @@ -500,7 +498,6 @@ def forward_extend(self, batch: ScheduleBatch):
get_embedding=True,
)

@torch.inference_mode()
def forward_extend_multi_modal(self, batch: ScheduleBatch):
input_metadata = InputMetadata.from_schedule_batch(self, batch)
return self.model.forward(
Expand Down

0 comments on commit 82c2fd0

Please sign in to comment.