implement logger jm v8

NINAnor · Jun 20, 2024 · 2a2421a · 2a2421a
1 parent a45f829
commit 2a2421a
Show file tree

Hide file tree

Showing 3 changed files with 41 additions and 9 deletions.
diff --git a/wizard/parsers/gps/jm.py b/wizard/parsers/gps/jm.py
@@ -1,18 +1,20 @@
 import io
 import csv
+import re
 
 import pandas as pd
 
 from parsers.parser_base import Parser, Parsable
 from parsers.helpers import stream_chunk_contains
 
-class GPS2JMParser(Parser):
+class GPS2JMParser7_5(Parser):
     '''
-    Parser for 2Jm format
+    Parser for 2Jm format v 7.5
     '''
     DATATYPE = "gps_2jm"
     # TODO: define fields
     FIELDS = [str(x) for x in range(0,13)]
+    VERSION = "v7.5"
     SEPARATOR = " "
     ENDINGS = [
         "[EOF]",
@@ -42,24 +44,36 @@ class GPS2JMParser(Parser):
     #     "ring_nr": None,
     #     "trip_nr": None,
     # }
+
+    def _fix_content(self, data):
+        return data
 
     def __init__(self, parsable: Parsable):
         super().__init__(parsable)
 
-        with self.file.get_stream(binary=False) as stream:
+        with self.file.get_stream(binary=False, errors='backslashreplace') as stream:
+            # TODO: check the first byte instead of the whole stream chunk
             if not stream.seekable():
                 self._raise_not_supported('Stream not seekable')
 
             if not stream_chunk_contains(stream, 30, "2JmGPS-LOG"):
                 self._raise_not_supported(f"Stream must start with 2JmGPS-LOG")
 
-            groups = stream.read().split('\n\n')[1:]
+            groups = stream.read().split('\n\n')
+            head = groups.pop(0)
+
+            if self.VERSION not in head:
+                self._raise_not_supported(f"Version not supported")
+
             data = None
             for group in groups:
                 if group in self.ENDINGS:
                     break
                 data = group
 
+
+            data = self._fix_content(data)
+
             content = io.StringIO(data)
 
             reader = csv.reader(content, delimiter=self.SEPARATOR, skipinitialspace=True)
@@ -69,6 +83,24 @@ def __init__(self, parsable: Parsable):
 
             self.data = pd.read_csv(content, header=0, names=self.FIELDS, sep=self.SEPARATOR, index_col=False)
 
+
+regex = re.compile(r'\s{2,10}', re.MULTILINE)
+
+class GPS2JMParser8(GPS2JMParser7_5):
+    VERSION = "v8"
+
+    def _fix_content(self, data: str):
+        '''
+        In version 8 there is a strange notation using the whitespace
+        also to right align the number for a specific column
+        In this case replace the multiple spaces
+        '''
+        return regex.sub(
+            ' ',
+            data
+        )
+
 PARSERS = [
-    GPS2JMParser,
+    GPS2JMParser7_5,
+    GPS2JMParser8,
 ]
diff --git a/wizard/parsers/helpers.py b/wizard/parsers/helpers.py
@@ -18,7 +18,6 @@ def stream_chunk_match(stream, length, text):
     position = stream.tell()
     chunk = stream.read(length)
     stream.seek(position)
-    print(chunk)
     return re.search(text, chunk)
 
 

diff --git a/wizard/parsers/parser_base.py b/wizard/parsers/parser_base.py
@@ -25,10 +25,11 @@ def __init__(self, file_path: pathlib.Path) -> None:
         self.encoding = self._detect_encoding()
 
     @contextmanager
-    def get_stream(self, binary=False):
+    def get_stream(self, binary=False, errors="strict"):
         params = {
             'mode': 'rb' if binary else 'r',
-            'encoding': None if binary else self.encoding
+            'encoding': None if binary else self.encoding,
+            'errors': errors if not binary else None,
         }
         stream = open(self._file_path, **params)
         yield stream
@@ -41,7 +42,7 @@ def _detect_encoding(self):
                 detector.feed(line)
                 if detector.done: break
             detector.close()
-            logging.debug(detector.result)
+            print(detector.result)
             return detector.result['encoding']