thtrieu · chadrick-kwag · Apr 3, 2018 · Apr 3, 2018 · Apr 3, 2018
diff --git a/.gitignore b/.gitignore
@@ -44,3 +44,6 @@ ckpt/*
 
 #pytest cache
 .cache/
+
+#dataset dir
+data
diff --git a/darkflow/cython_utils/.gitignore b/darkflow/cython_utils/.gitignore
@@ -0,0 +1,2 @@
+*.so
+
diff --git a/darkflow/defaults.py b/darkflow/defaults.py
@@ -35,6 +35,7 @@ def setDefaults(self):
         self.define('saveVideo', False, 'Records video from input video or camera')
         self.define('pbLoad', '', 'path to .pb protobuf file (metaLoad must also be specified)')
         self.define('metaLoad', '', 'path to .meta file generated during --savepb that corresponds to .pb file')
+        self.define('annotationformat','xml','format of annotation. xml or json available. default is xml(pascal_voc style)')
 
     def define(self, argName, default, description):
         self[argName] = default

diff --git a/darkflow/net/yolo/data.py b/darkflow/net/yolo/data.py
@@ -1,4 +1,5 @@
 from ...utils.pascal_voc_clean_xml import pascal_voc_clean_xml
+from ...utils.annotation_json_parser import annotation_json_parser
 from numpy.random import permutation as perm
 from .predict import preprocess
 # from .misc import show
@@ -15,7 +16,14 @@ def parse(self, exclusive = False):
         msg = 'Annotation directory not found {} .'
         exit('Error: {}'.format(msg.format(ann)))
     print('\n{} parsing {}'.format(meta['model'], ann))
-    dumps = pascal_voc_clean_xml(ann, meta['labels'], exclusive)
+
+    dumps = None
+
+    if self.FLAGS.annotationformat == 'xml':
+        dumps = pascal_voc_clean_xml(ann, meta['labels'], exclusive)
+    elif self.FLAGS.annotationformat == 'json':
+        dumps = annotation_json_parser(ann, meta['labels'], exclusive)
+
     return dumps
 
 

diff --git a/darkflow/utils/annotation_json_parser.py b/darkflow/utils/annotation_json_parser.py
@@ -0,0 +1,117 @@
+"""
+json format parser
+author: [email protected]
+
+most part of the code is just copied from pascal_voc_clean_xml.py
+
+the format of the json file should be like the following example:
+
+{"imgfile": "0313.png", "w": 640, "h": 480, "objects": [{"rect": {"y1": 4, "y2": 144, "x1": 385, "x2": 587}, "name": "face"}]}
+
+the json file should be in a single line.
+it is convenient to use the python's json module when creating these files.
+
+also, this parser checks the size comparison of x1/x2 and y1/y2.
+when this size rule is broken and mapped to xn,yn,xx,yx, then it will cause and error during training.
+
+"""
+
+import json
+import os
+import sys
+import glob
+
+
+
+def _pp(l): # pretty printing 
+    for i in l: print('{}: {}'.format(i,l[i]))
+
+def annotation_json_parser(ANN, pick, exclusive = False):
+    # ANN = FLAGS.annotation -> annotation dir
+    # pick = meta['labels']
+
+    dumps= list()
+    cur_dir = os.getcwd()
+    os.chdir(ANN)
+    annotations = os.listdir('.')
+    annotations = glob.glob(str(annotations)+'*.json')
+    size = len(annotations)
+
+    for i, file in enumerate(annotations):
+        # progress bar      
+        sys.stdout.write('\r')
+        percentage = 1. * (i+1) / size
+        progress = int(percentage * 20)
+        bar_arg = [progress*'=', ' '*(19-progress), percentage*100]
+        bar_arg += [file]
+        sys.stdout.write('[{}>{}]{:.0f}%  {}'.format(*bar_arg))
+        sys.stdout.flush()
+
+        # actual parsing 
+        print("opening file {}".format(file))
+        in_file = open(file)
+
+        # the file contains zero padding and the actual json is in the first line
+        firstline = in_file.readline()
+        firstline = firstline.replace('\0','')
+
+        root = json.loads(firstline)
+
+
+        imgfile = str(root['imgfile'])
+
+        w = root['w']
+        h = root['h']
+        all = list()
+        objects = root['objects']
+        for obj in objects:
+                current = list()
+                name = str(obj['name'])
+                if name not in pick:
+                    print("{} not in pick".format(name))
+                    continue
+
+                rect = obj['rect']
+
+                # xn = x1, xx = x2, yn = y1, yx = y2
+
+                xn = rect['x1']
+                xx = rect['x2']
+                yn = rect['y1']
+                yx = rect['y2']
+
+                # safety check for min/max
+
+                if xn>xx:
+                    xx = rect['x1']
+                    xn = rect['x2']
+
+                if yn > yx :
+                    yn = rect['y2']
+                    yx = rect['y1']
+
+
+                current = [name,xn,yn,xx,yx]
+                all += [current]
+
+        add = [[imgfile, [w, h, all]]]
+        dumps += add
+        in_file.close()
+
+    # gather all stats
+    stat = dict()
+    for dump in dumps:
+        all = dump[1][2]
+        for current in all:
+            if current[0] in pick:
+                if current[0] in stat:
+                    stat[current[0]]+=1
+                else:
+                    stat[current[0]] =1
+
+    print('\nStatistics:')
+    _pp(stat)
+    print('Dataset size: {}'.format(len(dumps)))
+
+    os.chdir(cur_dir)
+    return dumps
diff --git a/docs/annotation_json_format.md b/docs/annotation_json_format.md
@@ -0,0 +1,33 @@
+# using json as an annotation format
+
+previously, darkflow only supported pascal-voc xml format. JSON seemed to be another good option as an annotation format and a parser for json format has been added.
+
+## how to use json parser during training
+
+`--annotationformat` option has been added to the `flow` command. If it is not specified or the user gives `xml` as a value, then it will use the pascal-voc xml parser.
+
+On the other hand, if the user gives `json` as a value, then it will utilize the json parser.
+
+## json format
+
+The json parser will parse the files inside the specified annotation dir according to the following format.
+
+- `imgfile`: the name of the image file which should be inside the specified `--images` dir
+- `w`: the width of the image
+- `h`: the height of the image
+- `objects`: json array of the object info. Each object should have the following key-values:
+    - `rect`: json object which contains the 4 coordinates that specify the bounding box
+        - `y1`: one value of box's height
+        - `y2`: the other value of the box's height
+        - `x1`: one value of the box's width
+        - `x2`: the other value of the box's width
+    - `name`: the label for this box
+
+Here is an example:
+```
+{"imgfile": "0313.png", "w": 640, "h": 480, "objects": [{"rect": {"y1": 4, "y2": 144, "x1": 385, "x2": 587}, "name": "face"}]}
+```
+
+# Notice
+
+the json parser will check the x1,x2 / y1,y2 comparison and it will correct it.
diff --git a/flow b/flow
@@ -1,4 +1,4 @@
-#! /usr/bin/env python
+#! /usr/bin/env python3
 
 import sys
 from darkflow.cli import cliHandler