From 4ad3ef356dfde511653555bdf39e99220c9084d4 Mon Sep 17 00:00:00 2001 From: MarsCheng <1622677425@qq.com> Date: Mon, 29 Jul 2024 11:39:09 +0800 Subject: [PATCH] =?UTF-8?q?feat(data):=E6=B7=BB=E5=8A=A0=E6=95=B0=E6=8D=AE?= =?UTF-8?q?=E5=88=86=E7=BB=84=E5=B0=81=E8=A3=85=EF=BC=8C=E7=AE=80=E5=8C=96?= =?UTF-8?q?=E5=85=83=E6=95=B0=E6=8D=AE=E7=94=9F=E6=88=90=E3=80=81=E5=A4=9A?= =?UTF-8?q?=E7=BB=84=E6=95=B0=E6=8D=AE=E7=94=9F=E6=88=90=E5=92=8C=E6=B5=8B?= =?UTF-8?q?=E8=AF=95=E7=82=B9=E8=8C=83=E5=9B=B4=E7=A1=AE=E5=AE=9A?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- cyaron/__init__.py | 1 + cyaron/data.py | 201 ++++++++++++++++++++++++++++++++++++++++++ examples/test_data.py | 25 ++++++ 3 files changed, 227 insertions(+) create mode 100644 cyaron/data.py create mode 100644 examples/test_data.py diff --git a/cyaron/__init__.py b/cyaron/__init__.py index c9b81a1..b522a27 100644 --- a/cyaron/__init__.py +++ b/cyaron/__init__.py @@ -18,4 +18,5 @@ from .merger import Merger #from .visual import visualize from . import log +from .data import Data from random import randint, randrange, uniform, choice, random diff --git a/cyaron/data.py b/cyaron/data.py new file mode 100644 index 0000000..2c3cedc --- /dev/null +++ b/cyaron/data.py @@ -0,0 +1,201 @@ +import random +from queue import PriorityQueue +from typing import Union, Sequence, Tuple + +from cyaron import IO + + +def _check_defaults_format(defaults): + for sub in defaults: + for args in sub: + if not isinstance(args, Sequence) or len(args) != 2: + return False + name, val = args + if not isinstance(name, str) or not isinstance(val, int): + return False + return True + + +class Data: + def __init__( + self, + name: str, + subtask: int = 1, + num: Union[int, Sequence[int]] = 10, + checkpoints: Union[int, Sequence[int]] = 0, + args: Sequence[str] = None, + static: Union[Sequence[Tuple[str, int]], Sequence[Sequence[Tuple[str, int]]]] = None, + relations: Sequence[Sequence[str]] = None, + out_gen: str = None, + ): + r""" + Use it as a iter.In each iteration, + it will return a IO object and some basic args of the checkpoints + in a tuple like (io, a, b, n, m). + For example,it can decide the `n` and `m` of the checkpoint, + and you can use the IO object to write more infomation like a map with `n` points. + + Args: + name: the name of this data, + output file name will be "`{name}{index}.in`" and "`name{index}.out`" + subtask: the subtask number of this data, default is 1 + num: the number of this data,or the number of each subtask + checkpoints: number for multi checkpoint,or number of each subtask + args: args' name like `n`,`m` for each group + static: static args,or static args for each subtask + relations: relations between args, + each relation is a list of args mean [0] < [1] < [2]..., + if [0] <= [1], you can insert '=' between them. + For example ['a', '=', 'b', 'c'] mean `a <= b < c` + out_gen: the shell command to start the output generator,(None to disable) + + Example: + + Have no subtask.Need 5 data.No multi checkpoint. + 0 <= l < mid <= r < 20 + + >>> Data( + >>> name="test", + >>> num=5, + >>> static=[('L', 0), ('R', 20)], + >>> relations=[["L", "=", "l", "mid", "=", "r", "R"]] + >>> ) + + + Have 3 subtask,each subtask has 5 data.Each data has 5 checkpoint. + For first subtask,0 < n < 20. + For second subtask,20 < n < 40.For third subtask,40 < n < 60. + + >>> Data( + >>> name="test", + >>> subtask=3, + >>> num=[5, 5, 5], + >>> checkpoints=[5, 5, 5], + >>> static=[[('MIN', 0), ('MAX', 20)], + >>> [('MIN', 20), ('MAX', 40)], + >>> [('MIN', 40), ('MAX', 60)]], + >>> relations=[["MIN", "n", "MAX"]] + >>> ) + + You can find a real example in the `examples` dir + """ + self.name = name + self.subtask = subtask + self.num = num if isinstance(num, Sequence) else [num] * self.subtask + self.checkpoints = checkpoints if isinstance(checkpoints, Sequence) \ + else [checkpoints] * self.subtask + self.args = args if args is not None else [] + if static is None: + self.static = [[]] * self.subtask + elif _check_defaults_format(static): + self.static = static + elif _check_defaults_format([static]): + self.static = [static] * self.subtask + else: + raise ValueError("static args' format is wrong") + self.relations = relations if relations is not None else [] + self.relation_map = self._compile_args() + + if len(self.num) != self.subtask: + raise ValueError("num's length must be equal to subtask") + if len(self.checkpoints) != self.subtask: + raise ValueError("checkpoints' length must be equal to subtask") + if len(self.static) != self.subtask: + raise ValueError("static's length must be equal to subtask") + + self.index = 1 + self.subtask_index = 1 + self.point_index = 1 + self.checkpoint_index = 1 + self.io = None + self.out_gen = out_gen + + def _compile_args(self): + relation_map = {} + for rel in self.relations: + can_equal = False + last = None + for ele in rel: + if last is None: + if ele != "=": + last = ele + continue + if ele == "=": + can_equal = True + continue + if ele not in relation_map: + relation_map[ele] = [] + if last not in relation_map: + relation_map[last] = [] + relation_map[ele].append((last, can_equal, 1)) + relation_map[last].append((ele, can_equal, 0)) + last = ele + can_equal = False + return relation_map + + def _gen_args(self) -> Tuple: + max_val = PriorityQueue() + min_val = PriorityQueue() + for name, val in self.static[self.subtask_index - 1]: + max_val.put((val, name)) + min_val.put((-val, name)) + args = set(self.args) + values = {} + max_d = {} + min_d = {} + while args: + while not max_val.empty(): + val, name = max_val.get() + for ele, can_equal, d in self.relation_map[name]: + if d != 1: + continue + new_val = val - (0 if can_equal else 1) + if ele not in max_d or new_val < max_d[ele]: + max_d[ele] = new_val + max_val.put((new_val, ele)) + while not min_val.empty(): + val, name = min_val.get() + for ele, can_equal, d in self.relation_map[name]: + if d != 0: + continue + new_val = -val + (0 if can_equal else 1) + if ele not in min_d or new_val > min_d[ele]: + min_d[ele] = new_val + min_val.put((-new_val, ele)) + for name in args: + if name in max_d and name in min_d: + values[name] = random.randint(min_d[name], max_d[name]) + max_val.put((values[name], name)) + min_val.put((-values[name], name)) + break + else: + print(min_d, max_d) + raise RuntimeError("Relations too less to generate args") + args.remove(name) + return tuple(values[i] for i in self.args) + + def __iter__(self): + return self + + def __next__(self): + if self.subtask_index > self.subtask: + raise StopIteration + if self.checkpoint_index == 1: + if self.io is not None and self.out_gen is not None: + self.io.output_gen(self.out_gen) + self.io = IO(file_prefix=self.name, data_id=self.index) + checkpoint = self.checkpoints[self.subtask_index - 1] + if checkpoint > 0: + self.io.input_writeln(checkpoint) + args_value = self._gen_args() + self.io.input_writeln(args_value) + self.checkpoint_index += 1 + + if self.checkpoint_index > self.checkpoints[self.subtask_index - 1]: + self.point_index += 1 + self.index += 1 + self.checkpoint_index = 1 + if self.point_index > self.num[self.subtask_index - 1]: + self.point_index = 1 + self.subtask_index += 1 + return (self.io,) + args_value diff --git a/examples/test_data.py b/examples/test_data.py new file mode 100644 index 0000000..a5655e3 --- /dev/null +++ b/examples/test_data.py @@ -0,0 +1,25 @@ +from cyaron import Data, Vector +from cyaron.vector import VectorRandomMode + +# Problem: +# Given a sequence with n element a_1 to a_n. +# You should find the longest substring +# that sum of it don't bigger than m and +# length of it don't bigger than k. + +# For 20% of data, 1 <= k <= n < 100, 0 <= m <= 1000 +# For 100% of data, 1 <= k <= n < 1000, 0 <= m <= 1000000, 0 <= a_i <= 1000000 +# Output: The sum of the longest substring. + +for io, n, m, k in Data( + "test", + subtask=2, + num=[4, 16], + args=["n", "m", "k"], + static=[[("MIN", 1), ("MAX", 100), ("MMIN", 0), ("MMAX", 1000)], + [("MIN", 100), ("MAX", 1000), ("MMIN", 1000), ("MMAX", 1000000)]], + relations=[["MIN", "=", "k", "=", "n", "MAX"], ["MMIN", "=", "m", "=", "MMAX"]], + # out_gen="path to right program" +): + io.input_writeln(sum(Vector.random(n, [(0, 10000000)], + VectorRandomMode.unique), []))