From aa884076fe087c2cf782254b4caaa78cee58e76a Mon Sep 17 00:00:00 2001 From: Evan Hubinger Date: Wed, 2 Aug 2023 22:47:16 -0700 Subject: [PATCH] Further fix --incremental --- Makefile | 2 +- coconut/command/command.py | 21 +++++++----- coconut/compiler/compiler.py | 14 ++++---- coconut/compiler/util.py | 63 +++++++++++++++++++++++------------ coconut/constants.py | 8 +++-- coconut/integrations.py | 2 +- coconut/root.py | 2 +- coconut/tests/main_test.py | 2 ++ coconut/tests/src/extras.coco | 5 ++- 9 files changed, 76 insertions(+), 43 deletions(-) diff --git a/Makefile b/Makefile index 1f789a68d..bf2008bc1 100644 --- a/Makefile +++ b/Makefile @@ -152,7 +152,7 @@ test-mypy-tests: clean-no-tests python ./coconut/tests/dest/extras.py # same as test-univ but includes verbose output for better debugging -# regex for getting non-timing lines: ^(?!Time|\s+Packrat|Loaded|Saving).* +# regex for getting non-timing lines: ^(?!Time|\s+Packrat|Loaded|Saving)[^\n]* .PHONY: test-verbose test-verbose: export COCONUT_USE_COLOR=TRUE test-verbose: clean diff --git a/coconut/command/command.py b/coconut/command/command.py index 010a1ddb8..480a31ef5 100644 --- a/coconut/command/command.py +++ b/coconut/command/command.py @@ -73,6 +73,7 @@ coconut_cache_dir, coconut_run_kwargs, interpreter_uses_incremental, + disable_incremental_for_len, ) from coconut.util import ( univ_open, @@ -603,13 +604,16 @@ def callback(compiled): filename=os.path.basename(codepath), ) if self.incremental: - code_dir, code_fname = os.path.split(codepath) + if disable_incremental_for_len is not None and len(code) > disable_incremental_for_len: + logger.warn("--incremental mode is not currently supported for files as large as {codepath!r}") + else: + code_dir, code_fname = os.path.split(codepath) - cache_dir = os.path.join(code_dir, coconut_cache_dir) - ensure_dir(cache_dir) + cache_dir = os.path.join(code_dir, coconut_cache_dir) + ensure_dir(cache_dir) - pickle_fname = code_fname + ".pickle" - parse_kwargs["incremental_cache_filename"] = os.path.join(cache_dir, pickle_fname) + pickle_fname = code_fname + ".pickle" + parse_kwargs["incremental_cache_filename"] = os.path.join(cache_dir, pickle_fname) if package is True: self.submit_comp_job(codepath, callback, "parse_package", code, package_level=package_level, **parse_kwargs) @@ -822,9 +826,10 @@ def execute(self, compiled=None, path=None, use_eval=False, allow_show=True): if path is None: # header is not included if not self.mypy: no_str_code = self.comp.remove_strs(compiled) - result = mypy_builtin_regex.search(no_str_code) - if result: - logger.warn("found mypy-only built-in " + repr(result.group(0)) + "; pass --mypy to use mypy-only built-ins at the interpreter") + if no_str_code is not None: + result = mypy_builtin_regex.search(no_str_code) + if result: + logger.warn("found mypy-only built-in " + repr(result.group(0)) + "; pass --mypy to use mypy-only built-ins at the interpreter") else: # header is included compiled = rem_encoding(compiled) diff --git a/coconut/compiler/compiler.py b/coconut/compiler/compiler.py index 46e7c532b..4889b9f97 100644 --- a/coconut/compiler/compiler.py +++ b/coconut/compiler/compiler.py @@ -934,12 +934,14 @@ def complain_on_err(self): except CoconutException as err: complain(err) - def remove_strs(self, inputstring, inner_environment=True): - """Remove strings/comments from the given input.""" - with self.complain_on_err(): + def remove_strs(self, inputstring, inner_environment=True, **kwargs): + """Remove strings/comments from the given input if possible.""" + try: with (self.inner_environment() if inner_environment else noop_ctx()): - return self.str_proc(inputstring) - return inputstring + return self.str_proc(inputstring, **kwargs) + except Exception: + logger.log_exc() + return None def get_matcher(self, original, loc, check_var, name_list=None): """Get a Matcher object.""" @@ -1213,7 +1215,7 @@ def parsing(self, keep_state=False, filename=None): def streamline(self, grammar, inputstring="", force=False): """Streamline the given grammar for the given inputstring.""" - if force or (streamline_grammar_for_len is not None and len(inputstring) >= streamline_grammar_for_len): + if force or (streamline_grammar_for_len is not None and len(inputstring) > streamline_grammar_for_len): start_time = get_clock_time() prep_grammar(grammar, streamline=True) logger.log_lambda( diff --git a/coconut/compiler/util.py b/coconut/compiler/util.py index 83df71384..8ff8a3279 100644 --- a/coconut/compiler/util.py +++ b/coconut/compiler/util.py @@ -66,8 +66,9 @@ ParserElement, _trim_arity, _ParseResultsWithOffset, - line as _line, all_parse_elements, + line as _line, + __version__ as pyparsing_version, ) from coconut.integrations import embed @@ -111,6 +112,7 @@ py_vers_with_eols, unwrapper, incremental_cache_limit, + incremental_mode_cache_successes, ) from coconut.exceptions import ( CoconutException, @@ -356,26 +358,9 @@ def attach(item, action, ignore_no_tokens=None, ignore_one_token=None, ignore_to return add_action(item, action, make_copy) -def should_clear_cache(): - """Determine if we should be clearing the packrat cache.""" - if not ParserElement._packratEnabled: - internal_assert(not ParserElement._incrementalEnabled) - return False - if not ParserElement._incrementalEnabled: - return True - if ParserElement._incrementalWithResets and repeatedly_clear_incremental_cache: - return True - if incremental_cache_limit is not None and len(ParserElement.packrat_cache) > incremental_cache_limit: - return True - return False - - def final_evaluate_tokens(tokens): """Same as evaluate_tokens but should only be used once a parse is assured.""" - # don't clear the cache in incremental mode - if should_clear_cache(): - # clear cache without resetting stats - ParserElement.packrat_cache.clear() + clear_packrat_cache() return evaluate_tokens(tokens) @@ -537,6 +522,39 @@ def get_pyparsing_cache(): return {} +def should_clear_cache(): + """Determine if we should be clearing the packrat cache.""" + if not ParserElement._packratEnabled: + return False + if SUPPORTS_INCREMENTAL: + if not ParserElement._incrementalEnabled: + return True + if ParserElement._incrementalWithResets and repeatedly_clear_incremental_cache: + return True + if incremental_cache_limit is not None and len(ParserElement.packrat_cache) > incremental_cache_limit: + # only clear the second half of the cache, since the first + # half is what will help us next time we recompile + return "second half" + return False + + +def clear_packrat_cache(): + """Clear the packrat cache if applicable.""" + clear_cache = should_clear_cache() + if not clear_cache: + return + if clear_cache == "second half": + cache_items = list(get_pyparsing_cache().items()) + restore_items = cache_items[:len(cache_items) // 2] + else: + restore_items = () + # clear cache without resetting stats + ParserElement.packrat_cache.clear() + # restore any items we want to keep + for lookup, value in restore_items: + ParserElement.packrat_cache.set(lookup, value) + + def get_cache_items_for(original): """Get items from the pyparsing cache filtered to only from parsing original.""" cache = get_pyparsing_cache() @@ -561,6 +579,7 @@ def enable_incremental_parsing(): """Enable incremental parsing mode where prefix/suffix parses are reused.""" if not SUPPORTS_INCREMENTAL: return False + ParserElement._should_cache_incremental_success = incremental_mode_cache_successes if ParserElement._incrementalEnabled and not ParserElement._incrementalWithResets: # incremental mode is already enabled return True ParserElement._incrementalEnabled = False @@ -599,6 +618,7 @@ def pickle_incremental_cache(original, filename, protocol=pickle.HIGHEST_PROTOCO )) pickle_info_obj = { "VERSION": VERSION, + "pyparsing_version": pyparsing_version, "pickleable_cache_items": pickleable_cache_items, } with univ_open(filename, "wb") as pickle_file: @@ -617,7 +637,7 @@ def unpickle_incremental_cache(filename): except Exception: logger.log_exc() return False - if pickle_info_obj["VERSION"] != VERSION: + if pickle_info_obj["VERSION"] != VERSION or pickle_info_obj["pyparsing_version"] != pyparsing_version: return False pickleable_cache_items = pickle_info_obj["pickleable_cache_items"] @@ -633,10 +653,9 @@ def unpickle_incremental_cache(filename): if max_cache_size != float("inf"): pickleable_cache_items = pickleable_cache_items[-max_cache_size:] - packrat_cache = ParserElement.packrat_cache for pickleable_lookup, value in pickleable_cache_items: lookup = (all_parse_elements[pickleable_lookup[0]],) + pickleable_lookup[1:] - packrat_cache.set(lookup, value) + ParserElement.packrat_cache.set(lookup, value) return True diff --git a/coconut/constants.py b/coconut/constants.py index aee27380b..84d7a3d3b 100644 --- a/coconut/constants.py +++ b/coconut/constants.py @@ -120,7 +120,8 @@ def get_path_env_var(env_var, default): # below constants are experimentally determined to maximize performance -streamline_grammar_for_len = 4000 +streamline_grammar_for_len = 4096 +disable_incremental_for_len = streamline_grammar_for_len # disables --incremental use_packrat_parser = True # True also gives us better error messages packrat_cache_size = None # only works because final() clears the cache @@ -135,7 +136,8 @@ def get_path_env_var(env_var, default): # this is what gets used in compiler.util.enable_incremental_parsing() incremental_mode_cache_size = None -incremental_cache_limit = 524288 # clear cache when it gets this large +incremental_cache_limit = 1048576 # clear cache when it gets this large +incremental_mode_cache_successes = False use_left_recursion_if_available = False @@ -961,7 +963,7 @@ def get_path_env_var(env_var, default): # min versions are inclusive min_versions = { - "cPyparsing": (2, 4, 7, 2, 2, 2), + "cPyparsing": (2, 4, 7, 2, 2, 3), ("pre-commit", "py3"): (3,), ("psutil", "py>=27"): (5,), "jupyter": (1, 0), diff --git a/coconut/integrations.py b/coconut/integrations.py index 8d2fec811..f2a3537ee 100644 --- a/coconut/integrations.py +++ b/coconut/integrations.py @@ -173,7 +173,7 @@ def new_ctxvisit(self, ctxtransformer, node, inp, ctx, mode="exec", *args, **kwa # we handle our own inner_environment rather than have remove_strs do it so that we can reformat with self.compiler.inner_environment(): line_no_strs = self.compiler.remove_strs(line, inner_environment=False) - if ";" in line_no_strs: + if line_no_strs is not None and ";" in line_no_strs: remaining_pieces = [ self.compiler.reformat(piece, ignore_errors=True) for piece in line_no_strs.split(";") diff --git a/coconut/root.py b/coconut/root.py index 714a7124a..d184e8e2c 100644 --- a/coconut/root.py +++ b/coconut/root.py @@ -26,7 +26,7 @@ VERSION = "3.0.3" VERSION_NAME = None # False for release, int >= 1 for develop -DEVELOP = 4 +DEVELOP = 5 ALPHA = False # for pre releases rather than post releases assert DEVELOP is False or DEVELOP >= 1, "DEVELOP must be False or an int >= 1" diff --git a/coconut/tests/main_test.py b/coconut/tests/main_test.py index 7429d46e2..50cf27d86 100644 --- a/coconut/tests/main_test.py +++ b/coconut/tests/main_test.py @@ -812,6 +812,8 @@ def test_xontrib(self): p.sendline('echo f"{$ENV_VAR}"; echo f"{$ENV_VAR}"') p.expect("ABC") p.expect("ABC") + p.sendline('len("""1\n3\n5""")') + p.expect("5") if not PYPY or PY39: if PY36: p.sendline("echo 123;; 123") diff --git a/coconut/tests/src/extras.coco b/coconut/tests/src/extras.coco index c3d7d96d2..79de0f5f7 100644 --- a/coconut/tests/src/extras.coco +++ b/coconut/tests/src/extras.coco @@ -325,7 +325,10 @@ line 6''') assert_raises(-> parse("a=1;"), CoconutStyleError, err_has="\n ^") assert_raises(-> parse("class derp(object)"), CoconutStyleError) assert_raises(-> parse("def f(a.b) = True"), CoconutStyleError, err_has="\n ^") - assert_raises(-> parse("match def kwd_only_x_is_int_def_0(*, x is int = 0) = x"), CoconutStyleError, err_has="\n ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~|") + assert_raises(-> parse("match def kwd_only_x_is_int_def_0(*, x is int = 0) = x"), CoconutStyleError, err_has=( + "\n ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~|", + "\n ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~/", + )) try: parse(""" try: