Skip to content

Commit

Permalink
Changes to multi process single archive file input (#3384)
Browse files Browse the repository at this point in the history
  • Loading branch information
joachimmetz committed Jan 1, 2021
1 parent dc9629b commit 654f3b1
Show file tree
Hide file tree
Showing 4 changed files with 64 additions and 10 deletions.
51 changes: 51 additions & 0 deletions plaso/cli/extraction_tool.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,9 @@
import os
import pytz

from dfvfs.analyzer import analyzer as dfvfs_analyzer
from dfvfs.lib import definitions as dfvfs_definitions
from dfvfs.path import factory as path_spec_factory
from dfvfs.resolver import context as dfvfs_context

# The following import makes sure the analyzers are registered.
Expand Down Expand Up @@ -166,6 +168,55 @@ def _CreateProcessingConfiguration(self, knowledge_base):

return configuration

def _IsArchiveFile(self, path_spec):
"""Determines if a path specification references an archive file.
Args:
path_spec (dfvfs.PathSpec): path specification of the data stream.
Returns:
bool: True if the path specification references a supported archive file
format, False otherwise.
"""
try:
type_indicators = (
dfvfs_analyzer.Analyzer.GetCompressedStreamTypeIndicators(
path_spec, resolver_context=self._resolver_context))
except IOError:
type_indicators = []

if len(type_indicators) > 1:
return False

if type_indicators:
type_indicator = type_indicators[0]
else:
type_indicator = None

if type_indicator == dfvfs_definitions.TYPE_INDICATOR_BZIP2:
path_spec = path_spec_factory.Factory.NewPathSpec(
dfvfs_definitions.TYPE_INDICATOR_COMPRESSED_STREAM,
compression_method=dfvfs_definitions.COMPRESSION_METHOD_BZIP2,
parent=path_spec)

elif type_indicator == dfvfs_definitions.TYPE_INDICATOR_GZIP:
path_spec = path_spec_factory.Factory.NewPathSpec(
dfvfs_definitions.TYPE_INDICATOR_GZIP, parent=path_spec)

elif type_indicator == dfvfs_definitions.TYPE_INDICATOR_XZ:
path_spec = path_spec_factory.Factory.NewPathSpec(
dfvfs_definitions.TYPE_INDICATOR_COMPRESSED_STREAM,
compression_method=dfvfs_definitions.COMPRESSION_METHOD_XZ,
parent=path_spec)

try:
type_indicators = dfvfs_analyzer.Analyzer.GetArchiveTypeIndicators(
path_spec, resolver_context=self._resolver_context)
except IOError:
type_indicators = []

return bool(type_indicators)

def _ParsePerformanceOptions(self, options):
"""Parses the performance options.
Expand Down
9 changes: 5 additions & 4 deletions plaso/cli/log2timeline_tool.py
Original file line number Diff line number Diff line change
Expand Up @@ -388,13 +388,14 @@ def ExtractEventsFromSources(self):
storage_writer = storage_factory.StorageFactory.CreateStorageWriter(
self._storage_format, session, self._storage_file_path)
if not storage_writer:
raise errors.BadConfigOption(
'Unsupported storage format: {0:s}'.format(self._storage_format))
raise errors.BadConfigOption('Unsupported storage format: {0:s}'.format(
self._storage_format))

single_process_mode = self._single_process_mode
if self._source_type == dfvfs_definitions.SOURCE_TYPE_FILE:
# No need to multi process a single file source.
single_process_mode = True
if not self._process_archives or not self._IsArchiveFile(
self._source_path_specs[0]):
single_process_mode = True

if single_process_mode:
extraction_engine = single_process_engine.SingleProcessEngine()
Expand Down
5 changes: 3 additions & 2 deletions plaso/cli/psteal_tool.py
Original file line number Diff line number Diff line change
Expand Up @@ -337,8 +337,9 @@ def ExtractEventsFromSources(self):

single_process_mode = self._single_process_mode
if source_type == dfvfs_definitions.SOURCE_TYPE_FILE:
# No need to multi process a single file source.
single_process_mode = True
if not self._process_archives or not self._IsArchiveFile(
self._source_path_specs[0]):
single_process_mode = True

if single_process_mode:
extraction_engine = single_process_engine.SingleProcessEngine()
Expand Down
9 changes: 5 additions & 4 deletions plaso/engine/worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import re
import time

from dfvfs.analyzer import analyzer
from dfvfs.analyzer import analyzer as dfvfs_analyzer
from dfvfs.lib import definitions as dfvfs_definitions
from dfvfs.lib import errors as dfvfs_errors
from dfvfs.path import factory as path_spec_factory
Expand Down Expand Up @@ -419,7 +419,7 @@ def _GetArchiveTypes(self, mediator, path_spec):
list[str]: dfVFS archive type indicators found in the data stream.
"""
try:
type_indicators = analyzer.Analyzer.GetArchiveTypeIndicators(
type_indicators = dfvfs_analyzer.Analyzer.GetArchiveTypeIndicators(
path_spec, resolver_context=mediator.resolver_context)
except IOError as exception:
type_indicators = []
Expand All @@ -444,8 +444,9 @@ def _GetCompressedStreamTypes(self, mediator, path_spec):
the data stream.
"""
try:
type_indicators = analyzer.Analyzer.GetCompressedStreamTypeIndicators(
path_spec, resolver_context=mediator.resolver_context)
type_indicators = (
dfvfs_analyzer.Analyzer.GetCompressedStreamTypeIndicators(
path_spec, resolver_context=mediator.resolver_context))
except IOError as exception:
type_indicators = []

Expand Down

0 comments on commit 654f3b1

Please sign in to comment.