From 82c2fd0527cd2c6d7f6c0394724bf1848dec5552 Mon Sep 17 00:00:00 2001 From: Lianmin Zheng Date: Mon, 23 Sep 2024 06:52:43 -0700 Subject: [PATCH] update --- python/sglang/srt/model_executor/model_runner.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/python/sglang/srt/model_executor/model_runner.py b/python/sglang/srt/model_executor/model_runner.py index 049a43840e..5096257be6 100644 --- a/python/sglang/srt/model_executor/model_runner.py +++ b/python/sglang/srt/model_executor/model_runner.py @@ -467,7 +467,6 @@ def init_cuda_graphs(self): logger.info("Capture cuda graph begin. This can take up to several minutes.") self.cuda_graph_runner = CudaGraphRunner(self) - @torch.inference_mode() def forward_decode(self, batch: ScheduleBatch): if self.server_args.lora_paths is not None: self.lora_manager.prepare_lora_batch(batch) @@ -481,7 +480,6 @@ def forward_decode(self, batch: ScheduleBatch): batch.input_ids, input_metadata.positions, input_metadata ) - @torch.inference_mode() def forward_extend(self, batch: ScheduleBatch): input_metadata = InputMetadata.from_schedule_batch(self, batch) if self.server_args.lora_paths is not None: @@ -500,7 +498,6 @@ def forward_extend(self, batch: ScheduleBatch): get_embedding=True, ) - @torch.inference_mode() def forward_extend_multi_modal(self, batch: ScheduleBatch): input_metadata = InputMetadata.from_schedule_batch(self, batch) return self.model.forward(