Skip to content

Commit

Permalink
eval ready
Browse files Browse the repository at this point in the history
  • Loading branch information
kwea123 committed Jan 23, 2021
1 parent 465202a commit f72c67a
Show file tree
Hide file tree
Showing 4 changed files with 53 additions and 43 deletions.
20 changes: 14 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ Download the pretrained models and training logs in [release](https://github.com

# :mag_right: Testing

See [test.ipynb](test.ipynb) for a simple view synthesis and depth prediction on 1 image.
Example: [test_nerf-u.ipynb](test_nerf-u.ipynb) shows how NeRF-U successfully decomposes the scene into static and transient components.

Use [eval.py](eval.py) to create the whole sequence of moving views.
E.g.
Expand All @@ -79,14 +79,22 @@ python eval.py \
--dataset_name blender --scene_name lego \
--img_wh 400 400 --N_importance 64 --ckpt_path $CKPT_PATH
```
**IMPORTANT** : Don't forget to add `--spheric_poses` if the model is trained under `--spheric` setting!

It will create folder `results/{dataset_name}/{scene_name}` and run inference on all test data, finally create a gif out of them.


Example of lego scene using pretrained **NeRF-U** model under **occluder** condition: (PSNR=28.60, paper=23.47)
![nerf-u](https://user-images.githubusercontent.com/11364490/105578186-a9933400-5dc1-11eb-8865-e276b581d8fd.gif)

# :warning: Notes on differences with the original repo

* The learning rate decay in the original repo is **by step**, which means it decreases every step, here I use learning rate decay **by epoch**, which means it changes only at the end of 1 epoch.
* The validation image for LLFF dataset is chosen as the most centered image here, whereas the original repo chooses every 8th image.
* The rendering spiral path is slightly different from the original repo (I use approximate values to simplify the code).
* Network structure ([nerf.py](models/nerf.py)):
* My base MLP uses 8 layers of 256 units as the original NeRF, while NeRF-W uses **512** units each.
* My static head uses 1 layer as the original NeRF, while NeRF-W uses **4** layers.
* I use **softplus** activation for sigma (reason explained [here](https://github.com/bmild/nerf/issues/29#issuecomment-765335765)) while NeRF-W uses **relu**.

* Training hyperparameters
* I find larger `beta_min` achieves better result, so my default `beta_min` is `0.1` instead of `0.03` in the paper.
* I add 3 to `beta_loss` (equation 13) to make it positive empirically.

* Evalutaion
* The evaluation metric is computed on the **test** set, while NeRF evaluates on val and test combined.
64 changes: 33 additions & 31 deletions eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,17 +23,16 @@ def get_opts():
default='/home/ubuntu/data/nerf_example_data/nerf_synthetic/lego',
help='root directory of dataset')
parser.add_argument('--dataset_name', type=str, default='blender',
choices=['blender', 'llff'],
choices=['blender'],
help='which dataset to validate')
parser.add_argument('--scene_name', type=str, default='test',
help='scene name, used as output folder name')
parser.add_argument('--split', type=str, default='test',
help='test or test_train')
parser.add_argument('--split', type=str, default='val',
choices=['val', 'test', 'test_train'])
parser.add_argument('--img_wh', nargs="+", type=int, default=[800, 800],
help='resolution (img_w, img_h) of the image')
parser.add_argument('--spheric_poses', default=False, action="store_true",
help='whether images are taken in spheric poses (for llff)')

# original NeRF parameters
parser.add_argument('--N_emb_xyz', type=int, default=10,
help='number of xyz embedding frequencies')
parser.add_argument('--N_emb_dir', type=int, default=4,
Expand All @@ -45,9 +44,19 @@ def get_opts():
parser.add_argument('--use_disp', default=False, action="store_true",
help='use disparity depth sampling')

# NeRF-W parameters
parser.add_argument('--N_vocab', type=int, default=100,
help='''number of vocabulary (number of images)
in the dataset for nn.Embedding''')
parser.add_argument('--encode_a', default=False, action="store_true",
help='whether to encode appearance (NeRF-A)')
parser.add_argument('--N_a', type=int, default=48,
help='number of embeddings for appearance')
parser.add_argument('--encode_t', default=False, action="store_true",
help='whether to encode transient object (NeRF-U)')
parser.add_argument('--N_tau', type=int, default=16,
help='number of embeddings for transient objects')
parser.add_argument('--beta_min', type=float, default=0.03,
parser.add_argument('--beta_min', type=float, default=0.1,
help='minimum color variance for each ray')

parser.add_argument('--chunk', type=int, default=32*1024*4,
Expand All @@ -56,12 +65,6 @@ def get_opts():
parser.add_argument('--ckpt_path', type=str, required=True,
help='pretrained checkpoint path to load')

parser.add_argument('--save_depth', default=False, action="store_true",
help='whether to save depth prediction')
parser.add_argument('--depth_format', type=str, default='pfm',
choices=['pfm', 'bytes'],
help='which format to save')

return parser.parse_args()


Expand Down Expand Up @@ -103,24 +106,32 @@ def batched_inference(models, embeddings,
kwargs = {'root_dir': args.root_dir,
'split': args.split,
'img_wh': tuple(args.img_wh)}
if args.dataset_name == 'llff':
kwargs['spheric_poses'] = args.spheric_poses
dataset = dataset_dict[args.dataset_name](**kwargs)

embedding_t = torch.nn.Embedding(200, args.N_tau)
embedding_xyz = PosEmbedding(args.N_emb_xyz-1, args.N_emb_xyz)
embedding_dir = PosEmbedding(args.N_emb_dir-1, args.N_emb_dir)
nerf_coarse = NeRF('coarse')
nerf_fine = NeRF('fine', beta_min=args.beta_min)
load_ckpt(embedding_t, args.ckpt_path, model_name='embedding_t')
embeddings = {'xyz': embedding_xyz, 'dir': embedding_dir}
if args.encode_a:
embedding_a = torch.nn.Embedding(args.N_vocab, args.N_a).cuda()
load_ckpt(embedding_a, args.ckpt_path, model_name='embedding_a')
embeddings['a'] = embedding_a
if args.encode_t:
embedding_t = torch.nn.Embedding(args.N_vocab, args.N_tau).cuda()
load_ckpt(embedding_t, args.ckpt_path, model_name='embedding_t')
embeddings['t'] = embedding_t

nerf_coarse = NeRF('coarse').cuda()
nerf_fine = NeRF('fine',
encode_appearance=args.encode_a,
in_channels_a=args.N_a,
encode_transient=args.encode_t,
in_channels_t=args.N_tau,
beta_min=args.beta_min).cuda()

load_ckpt(nerf_coarse, args.ckpt_path, model_name='nerf_coarse')
load_ckpt(nerf_fine, args.ckpt_path, model_name='nerf_fine')
embedding_t.cuda()
nerf_coarse.cuda()
nerf_fine.cuda()

models = {'coarse': nerf_coarse, 'fine': nerf_fine}
embeddings = {'xyz': embedding_xyz, 'dir': embedding_dir, 't': embedding_t}

imgs, psnrs = [], []
dir_name = f'results/{args.dataset_name}/{args.scene_name}'
Expand All @@ -137,15 +148,6 @@ def batched_inference(models, embeddings,

img_pred = results['rgb_fine'].view(h, w, 3).cpu().numpy()

if args.save_depth:
depth_pred = results['depth_fine'].view(h, w).cpu().numpy()
depth_pred = np.nan_to_num(depth_pred)
if args.depth_format == 'pfm':
save_pfm(os.path.join(dir_name, f'depth_{i:03d}.pfm'), depth_pred)
else:
with open(f'depth_{i:03d}', 'wb') as f:
f.write(depth_pred.tobytes())

img_pred_ = (img_pred*255).astype(np.uint8)
imgs += [img_pred_]
imageio.imwrite(os.path.join(dir_name, f'{i:03d}.png'), img_pred_)
Expand Down
2 changes: 1 addition & 1 deletion opt.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ def get_opts():
help='whether to encode transient object (NeRF-U)')
parser.add_argument('--N_tau', type=int, default=16,
help='number of embeddings for transient objects')
parser.add_argument('--beta_min', type=float, default=0.03,
parser.add_argument('--beta_min', type=float, default=0.1,
help='minimum color variance for each ray')

parser.add_argument('--batch_size', type=int, default=1024,
Expand Down
10 changes: 5 additions & 5 deletions test_nerf-u.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,11 @@
"N_tau = 16\n",
"beta_min = 0.1\n",
"ckpt_path = 'ckpts/lego_nerfw_occ2/epoch=19.ckpt'\n",
"\n",
"N_samples = 64\n",
"N_importance = 64\n",
"use_disp = False\n",
"chunk = 1024*32\n",
"#############################\n",
"\n",
"embedding_xyz = PosEmbedding(9, 10)\n",
Expand Down Expand Up @@ -73,11 +78,6 @@
"metadata": {},
"outputs": [],
"source": [
"N_samples = 64\n",
"N_importance = 64\n",
"use_disp = False\n",
"chunk = 1024*32\n",
"\n",
"@torch.no_grad()\n",
"def f(rays, ts):\n",
" \"\"\"Do batched inference on rays using chunk.\"\"\"\n",
Expand Down

0 comments on commit f72c67a

Please sign in to comment.