Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Set torchserve version to 0.7.1 for compatibility #1815

Open
wants to merge 9 commits into
base: dev
Choose a base branch
from
68 changes: 68 additions & 0 deletions configs/clip/clip_vit-base-p16_zeroshot-cls_cifar100.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
_base_ = '../_base_/default_runtime.py'

# data settings
data_preprocessor = dict(
type='MultiModalDataPreprocessor',
mean=[0.48145466 * 255, 0.4578275 * 255, 0.40821073 * 255],
std=[0.26862954 * 255, 0.26130258 * 255, 0.27577711 * 255],
to_rgb=False,
)

test_pipeline = [
dict(type='Resize', scale=(224, 224), interpolation='bicubic'),
dict(
type='PackInputs',
algorithm_keys=['text'],
meta_keys=['image_id', 'scale_factor'],
),
]

train_dataloader = None
test_dataloader = dict(
batch_size=32,
num_workers=8,
dataset=dict(
type='CIFAR100',
data_root='data/cifar100',
split='test',
pipeline=test_pipeline),
sampler=dict(type='DefaultSampler', shuffle=False),
)
test_evaluator = dict(type='Accuracy', topk=(1, 5))

# schedule settings
train_cfg = None
val_cfg = None
test_cfg = dict()

# model settings
model = dict(
type='CLIPZeroShot',
vision_backbone=dict(
type='VisionTransformer',
arch='base',
img_size=224,
patch_size=16,
drop_rate=0.,
layer_cfgs=dict(act_cfg=dict(type='QuickGELU')),
pre_norm=True,
),
projection=dict(type='CLIPProjection', in_channels=768, out_channels=512),
text_backbone=dict(
type='CLIPTransformer',
width=512,
layers=12,
heads=8,
attn_mask=True,
),
tokenizer=dict(
type='AutoTokenizer',
name_or_path='openai/clip-vit-base-patch16',
use_fast=False),
vocab_size=49408,
transformer_width=512,
proj_dim=512,
text_prototype='cifar100',
text_prompt='openai_cifar100',
context_length=77,
)
69 changes: 69 additions & 0 deletions configs/clip/clip_vit-base-p16_zeroshot-cls_in1k.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
_base_ = '../_base_/default_runtime.py'

# data settings
data_preprocessor = dict(
type='MultiModalDataPreprocessor',
mean=[0.48145466 * 255, 0.4578275 * 255, 0.40821073 * 255],
std=[0.26862954 * 255, 0.26130258 * 255, 0.27577711 * 255],
to_rgb=True,
)

test_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='Resize', scale=(224, 224), interpolation='bicubic'),
dict(
type='PackInputs',
algorithm_keys=['text'],
meta_keys=['image_id', 'scale_factor'],
),
]

train_dataloader = None
test_dataloader = dict(
batch_size=32,
num_workers=8,
dataset=dict(
type='ImageNet',
data_root='data/imagenet',
split='val',
pipeline=test_pipeline),
sampler=dict(type='DefaultSampler', shuffle=False),
)
test_evaluator = dict(type='Accuracy', topk=(1, 5))

# schedule settings
train_cfg = None
val_cfg = None
test_cfg = dict()

# model settings
model = dict(
type='CLIPZeroShot',
vision_backbone=dict(
type='VisionTransformer',
arch='base',
img_size=224,
patch_size=16,
drop_rate=0.,
layer_cfgs=dict(act_cfg=dict(type='QuickGELU')),
pre_norm=True,
),
projection=dict(type='CLIPProjection', in_channels=768, out_channels=512),
text_backbone=dict(
type='CLIPTransformer',
width=512,
layers=12,
heads=8,
attn_mask=True,
),
tokenizer=dict(
type='AutoTokenizer',
name_or_path='openai/clip-vit-base-patch16',
use_fast=False),
vocab_size=49408,
transformer_width=512,
proj_dim=512,
text_prototype='imagenet',
text_prompt='openai_imagenet_sub', # openai_imagenet, openai_imagenet_sub
context_length=77,
)
68 changes: 68 additions & 0 deletions configs/clip/clip_vit-large-p14_zeroshot-cls_cifar100.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
_base_ = '../_base_/default_runtime.py'

# data settings
data_preprocessor = dict(
type='MultiModalDataPreprocessor',
mean=[0.48145466 * 255, 0.4578275 * 255, 0.40821073 * 255],
std=[0.26862954 * 255, 0.26130258 * 255, 0.27577711 * 255],
to_rgb=False,
)

test_pipeline = [
dict(type='Resize', scale=(224, 224), interpolation='bicubic'),
dict(
type='PackInputs',
algorithm_keys=['text'],
meta_keys=['image_id', 'scale_factor'],
),
]

train_dataloader = None
test_dataloader = dict(
batch_size=32,
num_workers=8,
dataset=dict(
type='CIFAR100',
data_root='data/cifar100',
split='test',
pipeline=test_pipeline),
sampler=dict(type='DefaultSampler', shuffle=False),
)
test_evaluator = dict(type='Accuracy', topk=(1, 5))

# schedule settings
train_cfg = None
val_cfg = None
test_cfg = dict()

# model settings
model = dict(
type='CLIPZeroShot',
vision_backbone=dict(
type='VisionTransformer',
arch='large',
img_size=224,
patch_size=14,
drop_rate=0.,
layer_cfgs=dict(act_cfg=dict(type='QuickGELU')),
pre_norm=True,
),
projection=dict(type='CLIPProjection', in_channels=1024, out_channels=768),
text_backbone=dict(
type='CLIPTransformer',
width=768,
layers=12,
heads=12,
attn_mask=True,
),
tokenizer=dict(
type='AutoTokenizer',
name_or_path='openai/clip-vit-large-patch14',
use_fast=False),
vocab_size=49408,
transformer_width=768,
proj_dim=768,
text_prototype='cifar100',
text_prompt='openai_cifar100',
context_length=77,
)
69 changes: 69 additions & 0 deletions configs/clip/clip_vit-large-p14_zeroshot-cls_in1k.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
_base_ = '../_base_/default_runtime.py'

# data settings
data_preprocessor = dict(
type='MultiModalDataPreprocessor',
mean=[0.48145466 * 255, 0.4578275 * 255, 0.40821073 * 255],
std=[0.26862954 * 255, 0.26130258 * 255, 0.27577711 * 255],
to_rgb=True,
)

test_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='Resize', scale=(224, 224), interpolation='bicubic'),
dict(
type='PackInputs',
algorithm_keys=['text'],
meta_keys=['image_id', 'scale_factor'],
),
]

train_dataloader = None
test_dataloader = dict(
batch_size=32,
num_workers=8,
dataset=dict(
type='ImageNet',
data_root='data/imagenet',
split='val',
pipeline=test_pipeline),
sampler=dict(type='DefaultSampler', shuffle=False),
)
test_evaluator = dict(type='Accuracy', topk=(1, 5))

# schedule settings
train_cfg = None
val_cfg = None
test_cfg = dict()

# model settings
model = dict(
type='CLIPZeroShot',
vision_backbone=dict(
type='VisionTransformer',
arch='large',
img_size=224,
patch_size=14,
drop_rate=0.,
layer_cfgs=dict(act_cfg=dict(type='QuickGELU')),
pre_norm=True,
),
projection=dict(type='CLIPProjection', in_channels=1024, out_channels=768),
text_backbone=dict(
type='CLIPTransformer',
width=768,
layers=12,
heads=12,
attn_mask=True,
),
tokenizer=dict(
type='AutoTokenizer',
name_or_path='openai/clip-vit-large-patch14',
use_fast=False),
vocab_size=49408,
transformer_width=768,
proj_dim=768,
text_prototype='imagenet',
text_prompt='openai_imagenet_sub', # openai_imagenet, openai_imagenet_sub
context_length=77,
)
5 changes: 3 additions & 2 deletions docker/serve/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
ARG PYTORCH="1.12.1"
ARG CUDA="11.3"
ARG CUDNN="8"
ARG TORCHSERVE="0.7.1"
FROM pytorch/torchserve:latest-gpu

ARG MMPRE="1.0.2"
Expand All @@ -11,8 +12,8 @@ ENV HOME="/home/model-server"
ENV PATH="/opt/conda/bin:$HOME/.local/bin:$PATH"
RUN export FORCE_CUDA=1

# TORCHSEVER
RUN pip install torchserve torch-model-archiver
# TORCHSERVE
RUN pip install torchserve==${TORCHSERVE} torch-model-archiver==${TORCHSERVE}
RUN pip install nvgpu

# OPEN-MMLAB
Expand Down
1 change: 1 addition & 0 deletions mmpretrain/apis/image_retrieval.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,7 @@ def build_dataloader(dataset):
# A config of dataset
from mmpretrain.registry import DATASETS
test_pipeline = [dict(type='LoadImageFromFile'), self.pipeline]
prototype.setdefault('pipeline', test_pipeline)
dataset = DATASETS.build(prototype)
dataloader = build_dataloader(dataset)
elif isinstance(prototype, DataLoader):
Expand Down
52 changes: 52 additions & 0 deletions mmpretrain/configs/_base_/datasets/cifar10_bs16.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
# Copyright (c) OpenMMLab. All rights reserved.
# This is a BETA new format config file, and the usage may change recently.
from mmengine.dataset import DefaultSampler

from mmpretrain.datasets import CIFAR10, PackInputs, RandomCrop, RandomFlip
from mmpretrain.evaluation import Accuracy

# dataset settings
dataset_type = CIFAR10
data_preprocessor = dict(
num_classes=10,
# RGB format normalization parameters
mean=[125.307, 122.961, 113.8575],
std=[51.5865, 50.847, 51.255],
# loaded images are already RGB format
to_rgb=False)

train_pipeline = [
dict(type=RandomCrop, crop_size=32, padding=4),
dict(type=RandomFlip, prob=0.5, direction='horizontal'),
dict(type=PackInputs),
]

test_pipeline = [
dict(type=PackInputs),
]

train_dataloader = dict(
batch_size=16,
num_workers=2,
dataset=dict(
type=dataset_type,
data_root='data/cifar10',
split='train',
pipeline=train_pipeline),
sampler=dict(type=DefaultSampler, shuffle=True),
)

val_dataloader = dict(
batch_size=16,
num_workers=2,
dataset=dict(
type=dataset_type,
data_root='data/cifar10/',
split='test',
pipeline=test_pipeline),
sampler=dict(type=DefaultSampler, shuffle=False),
)
val_evaluator = dict(type=Accuracy, topk=(1, ))

test_dataloader = val_dataloader
test_evaluator = val_evaluator
Loading