Skip to content

Commit

Permalink
implement logger jm v8
Browse files Browse the repository at this point in the history
  • Loading branch information
nicokant committed Jun 20, 2024
1 parent a45f829 commit 2a2421a
Show file tree
Hide file tree
Showing 3 changed files with 41 additions and 9 deletions.
42 changes: 37 additions & 5 deletions wizard/parsers/gps/jm.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,20 @@
import io
import csv
import re

import pandas as pd

from parsers.parser_base import Parser, Parsable
from parsers.helpers import stream_chunk_contains

class GPS2JMParser(Parser):
class GPS2JMParser7_5(Parser):
'''
Parser for 2Jm format
Parser for 2Jm format v 7.5
'''
DATATYPE = "gps_2jm"
# TODO: define fields
FIELDS = [str(x) for x in range(0,13)]
VERSION = "v7.5"
SEPARATOR = " "
ENDINGS = [
"[EOF]",
Expand Down Expand Up @@ -42,24 +44,36 @@ class GPS2JMParser(Parser):
# "ring_nr": None,
# "trip_nr": None,
# }

def _fix_content(self, data):
return data

def __init__(self, parsable: Parsable):
super().__init__(parsable)

with self.file.get_stream(binary=False) as stream:
with self.file.get_stream(binary=False, errors='backslashreplace') as stream:
# TODO: check the first byte instead of the whole stream chunk
if not stream.seekable():
self._raise_not_supported('Stream not seekable')

if not stream_chunk_contains(stream, 30, "2JmGPS-LOG"):
self._raise_not_supported(f"Stream must start with 2JmGPS-LOG")

groups = stream.read().split('\n\n')[1:]
groups = stream.read().split('\n\n')
head = groups.pop(0)

if self.VERSION not in head:
self._raise_not_supported(f"Version not supported")

data = None
for group in groups:
if group in self.ENDINGS:
break
data = group


data = self._fix_content(data)

content = io.StringIO(data)

reader = csv.reader(content, delimiter=self.SEPARATOR, skipinitialspace=True)
Expand All @@ -69,6 +83,24 @@ def __init__(self, parsable: Parsable):

self.data = pd.read_csv(content, header=0, names=self.FIELDS, sep=self.SEPARATOR, index_col=False)


regex = re.compile(r'\s{2,10}', re.MULTILINE)

class GPS2JMParser8(GPS2JMParser7_5):
VERSION = "v8"

def _fix_content(self, data: str):
'''
In version 8 there is a strange notation using the whitespace
also to right align the number for a specific column
In this case replace the multiple spaces
'''
return regex.sub(
' ',
data
)

PARSERS = [
GPS2JMParser,
GPS2JMParser7_5,
GPS2JMParser8,
]
1 change: 0 additions & 1 deletion wizard/parsers/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ def stream_chunk_match(stream, length, text):
position = stream.tell()
chunk = stream.read(length)
stream.seek(position)
print(chunk)
return re.search(text, chunk)


Expand Down
7 changes: 4 additions & 3 deletions wizard/parsers/parser_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,11 @@ def __init__(self, file_path: pathlib.Path) -> None:
self.encoding = self._detect_encoding()

@contextmanager
def get_stream(self, binary=False):
def get_stream(self, binary=False, errors="strict"):
params = {
'mode': 'rb' if binary else 'r',
'encoding': None if binary else self.encoding
'encoding': None if binary else self.encoding,
'errors': errors if not binary else None,
}
stream = open(self._file_path, **params)
yield stream
Expand All @@ -41,7 +42,7 @@ def _detect_encoding(self):
detector.feed(line)
if detector.done: break
detector.close()
logging.debug(detector.result)
print(detector.result)
return detector.result['encoding']


Expand Down

0 comments on commit 2a2421a

Please sign in to comment.