From 3d6caa0c9f704467e4a0d10255cdf6e5c60a508d Mon Sep 17 00:00:00 2001 From: Jack Boylan Date: Wed, 8 May 2024 07:59:02 +0000 Subject: [PATCH 01/23] Adds support for multigraphs --- grandcypher/__init__.py | 90 +++++++++++++++++++++++++++++------------ 1 file changed, 64 insertions(+), 26 deletions(-) diff --git a/grandcypher/__init__.py b/grandcypher/__init__.py index 35d9c2a..142b926 100644 --- a/grandcypher/__init__.py +++ b/grandcypher/__init__.py @@ -7,7 +7,7 @@ """ -from typing import Dict, List, Callable, Tuple +from typing import Dict, List, Callable, Tuple, Union from collections import OrderedDict import random import string @@ -119,7 +119,6 @@ | LEFT_ANGLE? "-[" CNAME ":" TYPE "*" MIN_HOP ".." MAX_HOP "]-" RIGHT_ANGLE? - LEFT_ANGLE : "<" RIGHT_ANGLE : ">" EQUAL : "=" @@ -198,27 +197,64 @@ def _is_node_attr_match( @lru_cache() def _is_edge_attr_match( - motif_edge_id: Tuple[str, str], - host_edge_id: Tuple[str, str], - motif: nx.Graph, - host: nx.Graph, + motif_edge_id: Tuple[str, str, Union[int, str]], + host_edge_id: Tuple[str, str, Union[int, str]], + motif: Union[nx.Graph, nx.MultiDiGraph], + host: Union[nx.Graph, nx.MultiDiGraph] ) -> bool: """ - Check if an edge in the host graph matches the attributes in the motif. - This also check the __labels__ of edges. + Check if an edge in the host graph matches the attributes in the motif, + including the special '__labels__' set attribute. This function is adapted + for MultiDiGraphs. Arguments: - motif_edge_id (str): The motif edge ID - host_edge_id (str): The host edge ID - motif (nx.Graph): The motif graph - host (nx.Graph): The host graph + motif_edge_id (Tuple[str, str, Union[int, str]]): The motif edge ID + host_edge_id (Tuple[str, str, Union[int, str]]): The host edge ID + motif (Union[nx.Graph, nx.MultiDiGraph]): The motif graph + host (Union[nx.Graph, nx.MultiDiGraph]): The host graph Returns: bool: True if the host edge matches the attributes in the motif - """ - motif_edge = motif.edges[motif_edge_id] - host_edge = host.edges[host_edge_id] + motif_u, motif_v = motif_edge_id + host_u, host_v = host_edge_id + + # Handle the difference in edge access for Graph and MultiDiGraph + if isinstance(motif, nx.MultiDiGraph): + motif_edges = motif[motif_u][motif_v] + else: + motif_edges = {0: motif[motif_u][motif_v]} # Mock single edge + + if isinstance(host, nx.MultiDiGraph): + host_edges = host[host_u][host_v] + else: + host_edges = {0: host[host_u][host_v]} # Mock single edge + # AtlasView({0: {'__labels__': {'friend'}}, 1: {'__labels__': {'colleague'}}}) + # {'__labels__': {'A'}} // {'bar': '1'} + + # Aggregate all __labels__ into one set + + motif_agg = { + "__labels__": set() + } + for edge_id, motif_attr in motif_edges.items(): + if "__labels__" in motif_attr and motif_attr['__labels__']: + motif_agg["__labels__"].update(motif_attr['__labels__']) + elif "__labels__" not in motif_attr: + motif_agg[edge_id] = motif_attr + + host_agg = { + "__labels__": set() + } + for edge_id, host_attr in host_edges.items(): + if "__labels__" in host_attr and host_attr['__labels__']: + host_agg["__labels__"].update(host_attr['__labels__']) + elif "__labels__" not in host_attr: + host_agg[edge_id] = host_attr + + + motif_edge = motif_agg + host_edge = host_agg for attr, val in motif_edge.items(): if attr == "__labels__": @@ -227,7 +263,7 @@ def _is_edge_attr_match( continue if host_edge.get(attr) != val: return False - + return True @@ -323,7 +359,7 @@ def __init__(self, target_graph: nx.Graph, limit=None): self._target_graph = target_graph self._paths = [] self._where_condition: CONDITION = None - self._motif = nx.DiGraph() + self._motif = nx.MultiDiGraph() #nx.MultiDiGraph() # nx.DiGraph() self._matches = None self._matche_paths = None self._return_requests = [] @@ -385,7 +421,8 @@ def _lookup(self, data_paths: List[str], offset_limit) -> Dict[str, List]: else: mapping_u, mapping_v = self._return_edges[data_path] # We are looking for an edge mapping in the target graph: - is_hop = self._motif.edges[(mapping_u, mapping_v)]["__is_hop__"] + # import ipdb;ipdb.set_trace() + is_hop = self._motif.edges[(mapping_u, mapping_v, 0)]["__is_hop__"] ret = ( _get_edge( self._target_graph, mapping, match_path, mapping_u, mapping_v @@ -606,7 +643,7 @@ def _is_limit(self, count): # Check if limit reached return self._limit and count >= (self._limit + self._skip) - def _edge_hop_motifs(self, motif: nx.DiGraph) -> List[Tuple[nx.Graph, dict]]: + def _edge_hop_motifs(self, motif: nx.MultiDiGraph) -> List[Tuple[nx.Graph, dict]]: """generate a list of edge-hop-expanded motif with edge-hop-map. Arguments: @@ -618,19 +655,20 @@ def _edge_hop_motifs(self, motif: nx.DiGraph) -> List[Tuple[nx.Graph, dict]]: where a real edge path can have more than 2 element (hop >= 2) or it can have 2 same element (hop = 0). """ - new_motif = nx.DiGraph() + new_motif = nx.MultiDiGraph() for n in motif.nodes: if motif.out_degree(n) == 0 and motif.in_degree(n) == 0: new_motif.add_node(n, **motif.nodes[n]) motifs: List[Tuple[nx.DiGraph, dict]] = [(new_motif, {})] - for u, v in motif.edges: + # import ipdb;ipdb.set_trace() + for u, v, k in motif.edges: # OutMultiEdgeView([('a', 'b', 0)]) new_motifs = [] - min_hop = motif.edges[u, v]["__min_hop__"] - max_hop = motif.edges[u, v]["__max_hop__"] - edge_type = motif.edges[u, v]["__labels__"] + min_hop = motif.edges[u, v, k]["__min_hop__"] + max_hop = motif.edges[u, v, k]["__max_hop__"] + edge_type = motif.edges[u, v, k]["__labels__"] hops = [] if min_hop == 0: - new_motif = nx.DiGraph() + new_motif = nx.MultiDiGraph() new_motif.add_node(u, **motif.nodes[u]) new_motifs.append((new_motif, {(u, v): (u, u)})) elif min_hop >= 1: @@ -638,7 +676,7 @@ def _edge_hop_motifs(self, motif: nx.DiGraph) -> List[Tuple[nx.Graph, dict]]: hops.append(shortuuid()) for _ in range(max(min_hop, 1), max_hop): new_edges = [u] + hops + [v] - new_motif = nx.DiGraph() + new_motif = nx.MultiDiGraph() new_motif.add_edges_from( list(zip(new_edges[:-1], new_edges[1:])), __labels__=edge_type ) From 1f2d658bd606450ed70c06a0e87aa337b3f27401 Mon Sep 17 00:00:00 2001 From: Jack Boylan Date: Wed, 8 May 2024 08:39:08 +0000 Subject: [PATCH 02/23] Refactors `_is_edge_attr_match` --- grandcypher/__init__.py | 77 ++++++++++++++++++----------------------- 1 file changed, 34 insertions(+), 43 deletions(-) diff --git a/grandcypher/__init__.py b/grandcypher/__init__.py index 142b926..69e5115 100644 --- a/grandcypher/__init__.py +++ b/grandcypher/__init__.py @@ -207,66 +207,57 @@ def _is_edge_attr_match( including the special '__labels__' set attribute. This function is adapted for MultiDiGraphs. - Arguments: - motif_edge_id (Tuple[str, str, Union[int, str]]): The motif edge ID - host_edge_id (Tuple[str, str, Union[int, str]]): The host edge ID - motif (Union[nx.Graph, nx.MultiDiGraph]): The motif graph - host (Union[nx.Graph, nx.MultiDiGraph]): The host graph + Args: + motif_edge_id: The motif edge ID. + host_edge_id: The host edge ID. + motif: The motif graph. + host: The host graph. Returns: - bool: True if the host edge matches the attributes in the motif + True if the host edge matches the attributes in the motif. """ motif_u, motif_v = motif_edge_id host_u, host_v = host_edge_id - # Handle the difference in edge access for Graph and MultiDiGraph - if isinstance(motif, nx.MultiDiGraph): - motif_edges = motif[motif_u][motif_v] - else: - motif_edges = {0: motif[motif_u][motif_v]} # Mock single edge - - if isinstance(host, nx.MultiDiGraph): - host_edges = host[host_u][host_v] - else: - host_edges = {0: host[host_u][host_v]} # Mock single edge - # AtlasView({0: {'__labels__': {'friend'}}, 1: {'__labels__': {'colleague'}}}) - # {'__labels__': {'A'}} // {'bar': '1'} + # Format edges for both DiGraph and MultiDiGraph + motif_edges = _get_edge_attributes(motif, motif_u, motif_v) + host_edges = _get_edge_attributes(host, host_u, host_v) # Aggregate all __labels__ into one set + motif_edges = _aggregate_edge_labels(motif_edges) + host_edges = _aggregate_edge_labels(host_edges) - motif_agg = { - "__labels__": set() - } - for edge_id, motif_attr in motif_edges.items(): - if "__labels__" in motif_attr and motif_attr['__labels__']: - motif_agg["__labels__"].update(motif_attr['__labels__']) - elif "__labels__" not in motif_attr: - motif_agg[edge_id] = motif_attr - - host_agg = { - "__labels__": set() - } - for edge_id, host_attr in host_edges.items(): - if "__labels__" in host_attr and host_attr['__labels__']: - host_agg["__labels__"].update(host_attr['__labels__']) - elif "__labels__" not in host_attr: - host_agg[edge_id] = host_attr - - - motif_edge = motif_agg - host_edge = host_agg - - for attr, val in motif_edge.items(): + for attr, val in motif_edges.items(): if attr == "__labels__": - if val and val - host_edge.get("__labels__", set()): + if val and val - host_edges.get("__labels__", set()): return False continue - if host_edge.get(attr) != val: + if host_edges.get(attr) != val: return False return True +def _get_edge_attributes(graph: Union[nx.Graph, nx.MultiDiGraph], u, v) -> Dict: + """ + Retrieve edge attributes from a graph, handling both Graph and MultiDiGraph. + """ + if isinstance(graph, nx.MultiDiGraph): + return graph[u][v] + return {0: graph[u][v]} # Mock single edge for DiGraph + +def _aggregate_edge_labels(edges: Dict) -> Dict: + """ + Aggregate '__labels__' attributes from edges into a single set. + """ + aggregated = {"__labels__": set()} + for edge_id, attrs in edges.items(): + if "__labels__" in attrs and attrs["__labels__"]: + aggregated["__labels__"].update(attrs["__labels__"]) + elif "__labels__" not in attrs: + aggregated[edge_id] = attrs + return aggregated + def _get_entity_from_host(host: nx.DiGraph, entity_name, entity_attribute=None): if entity_name in host.nodes(): # We are looking for a node mapping in the target graph: From aed457eebf8eda50aa7067d17a1714519e733a84 Mon Sep 17 00:00:00 2001 From: Jack Boylan Date: Wed, 8 May 2024 14:31:22 +0000 Subject: [PATCH 03/23] Filters relations by __label__ during `_lookup` --- grandcypher/__init__.py | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/grandcypher/__init__.py b/grandcypher/__init__.py index 69e5115..acef2c9 100644 --- a/grandcypher/__init__.py +++ b/grandcypher/__init__.py @@ -410,9 +410,8 @@ def _lookup(self, data_paths: List[str], offset_limit) -> Dict[str, List]: ret.append(path) else: - mapping_u, mapping_v = self._return_edges[data_path] + mapping_u, mapping_v = self._return_edges[data_path.split('.')[0]] # We are looking for an edge mapping in the target graph: - # import ipdb;ipdb.set_trace() is_hop = self._motif.edges[(mapping_u, mapping_v, 0)]["__is_hop__"] ret = ( _get_edge( @@ -423,9 +422,23 @@ def _lookup(self, data_paths: List[str], offset_limit) -> Dict[str, List]: ret = (r[0] if is_hop else r for r in ret) # we keep the original list if len > 2 (edge hop 2+) + # Get all edge labels from the motif -- this is used to filter the relations for multigraphs + motif_edge_labels = set() + for edge in self._motif.get_edge_data(mapping_u, mapping_v).values(): + if edge.get('__labels__', None): + motif_edge_labels.update(edge['__labels__']) + if entity_attribute: # Get the correct entity from the target host graph, # and then return the attribute: + if isinstance(self._motif, nx.MultiDiGraph): + # unroll the relations in the multigraph + unnested_ret = [] + for r in ret: + unnested_ret.extend(r.values()) + + ret = [r for r in unnested_ret if (len(motif_edge_labels) == 0 or r['__labels__'].issubset(motif_edge_labels))] + ret = (r.get(entity_attribute, None) for r in ret) result[data_path] = list(ret)[offset_limit] @@ -651,7 +664,6 @@ def _edge_hop_motifs(self, motif: nx.MultiDiGraph) -> List[Tuple[nx.Graph, dict] if motif.out_degree(n) == 0 and motif.in_degree(n) == 0: new_motif.add_node(n, **motif.nodes[n]) motifs: List[Tuple[nx.DiGraph, dict]] = [(new_motif, {})] - # import ipdb;ipdb.set_trace() for u, v, k in motif.edges: # OutMultiEdgeView([('a', 'b', 0)]) new_motifs = [] min_hop = motif.edges[u, v, k]["__min_hop__"] From 849ad2f290a4fa67bf828465ed193c7c2844344e Mon Sep 17 00:00:00 2001 From: Jack Boylan Date: Thu, 9 May 2024 09:38:52 +0000 Subject: [PATCH 04/23] Bundles relation attributes together for lookup --- grandcypher/__init__.py | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/grandcypher/__init__.py b/grandcypher/__init__.py index acef2c9..f750f34 100644 --- a/grandcypher/__init__.py +++ b/grandcypher/__init__.py @@ -350,7 +350,7 @@ def __init__(self, target_graph: nx.Graph, limit=None): self._target_graph = target_graph self._paths = [] self._where_condition: CONDITION = None - self._motif = nx.MultiDiGraph() #nx.MultiDiGraph() # nx.DiGraph() + self._motif = nx.MultiDiGraph() # nx.DiGraph() self._matches = None self._matche_paths = None self._return_requests = [] @@ -435,14 +435,26 @@ def _lookup(self, data_paths: List[str], offset_limit) -> Dict[str, List]: # unroll the relations in the multigraph unnested_ret = [] for r in ret: - unnested_ret.extend(r.values()) + + if motif_edge_labels == set(): + unnested_ret.append(r) + elif any([i.get('__labels__', None).issubset(motif_edge_labels) for i in r.values()]): + unnested_ret.append(r) - ret = [r for r in unnested_ret if (len(motif_edge_labels) == 0 or r['__labels__'].issubset(motif_edge_labels))] + ret = unnested_ret + + n_ret = [] + for r in ret: + new_ret = {} + for i, v in r.items(): + new_ret[i] = v.get(entity_attribute, None) + n_ret.append(new_ret) - ret = (r.get(entity_attribute, None) for r in ret) + ret = n_ret result[data_path] = list(ret)[offset_limit] + return result def return_clause(self, clause): @@ -698,7 +710,7 @@ def _product_motifs( new_motifs = [] for motif_1, mapping_1 in motifs_1: for motif_2, mapping_2 in motifs_2: - motif = nx.DiGraph() + motif = nx.MultiDiGraph() motif.add_nodes_from(motif_1.nodes.data()) motif.add_nodes_from(motif_2.nodes.data()) motif.add_edges_from(motif_1.edges.data()) From 282328121e61c9c5cc463d1799a1ba4cad42fc04 Mon Sep 17 00:00:00 2001 From: Jack Boylan Date: Thu, 9 May 2024 09:46:19 +0000 Subject: [PATCH 05/23] Refactors and adds inline docs --- grandcypher/__init__.py | 29 ++++++++++++++--------------- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/grandcypher/__init__.py b/grandcypher/__init__.py index f750f34..5f9428e 100644 --- a/grandcypher/__init__.py +++ b/grandcypher/__init__.py @@ -431,26 +431,25 @@ def _lookup(self, data_paths: List[str], offset_limit) -> Dict[str, List]: if entity_attribute: # Get the correct entity from the target host graph, # and then return the attribute: - if isinstance(self._motif, nx.MultiDiGraph): - # unroll the relations in the multigraph - unnested_ret = [] + if isinstance(self._motif, nx.MultiDiGraph) and len(motif_edge_labels) > 0: + # filter the retrieved edge(s) based on the motif edge labels + filtered_ret = [] for r in ret: - if motif_edge_labels == set(): - unnested_ret.append(r) - elif any([i.get('__labels__', None).issubset(motif_edge_labels) for i in r.values()]): - unnested_ret.append(r) - - ret = unnested_ret + if any([i.get('__labels__', None).issubset(motif_edge_labels) for i in r.values()]): + filtered_ret.append(r) - n_ret = [] + ret = filtered_ret + + # get the attribute from the retrieved edge(s) + ret_with_attr = [] for r in ret: - new_ret = {} + r_attr = {} for i, v in r.items(): - new_ret[i] = v.get(entity_attribute, None) - n_ret.append(new_ret) + r_attr[i] = v.get(entity_attribute, None) + ret_with_attr.append(r_attr) - ret = n_ret + ret = ret_with_attr result[data_path] = list(ret)[offset_limit] @@ -710,7 +709,7 @@ def _product_motifs( new_motifs = [] for motif_1, mapping_1 in motifs_1: for motif_2, mapping_2 in motifs_2: - motif = nx.MultiDiGraph() + motif = nx.DiGraph() motif.add_nodes_from(motif_1.nodes.data()) motif.add_nodes_from(motif_2.nodes.data()) motif.add_edges_from(motif_1.edges.data()) From ee801b328a779d0b6b9b82b911c08728d90b5335 Mon Sep 17 00:00:00 2001 From: Jack Boylan Date: Thu, 9 May 2024 10:00:09 +0000 Subject: [PATCH 06/23] Adds tests for multigraph support --- grandcypher/test_queries.py | 100 ++++++++++++++++++++++++++++++++++++ 1 file changed, 100 insertions(+) diff --git a/grandcypher/test_queries.py b/grandcypher/test_queries.py index 0cbf2e3..18ee30f 100644 --- a/grandcypher/test_queries.py +++ b/grandcypher/test_queries.py @@ -825,6 +825,106 @@ def test_order_by_with_non_returned_field(self): assert res["n.name"] == ["Carol", "Alice", "Bob"] +class TestMultigraphRelations: + def test_query_with_multiple_relations(self): + host = nx.MultiDiGraph() + host.add_node("a", name="Alice", age=25) + host.add_node("b", name="Bob", age=30) + host.add_node("c", name="Charlie", age=25) + host.add_node("d", name="Diana", age=25) + + # Adding edges with labels for different types of relationship_type + host.add_edge("a", "b", __labels__={"friends"}) + host.add_edge("a", "b", __labels__={"colleagues"}) + host.add_edge("a", "c", __labels__={"colleagues"}) + host.add_edge("b", "d", __labels__={"family"}) + host.add_edge("c", "d", __labels__={"family"}) + host.add_edge("c", "d", __labels__={"friends"}) + host.add_edge("d", "a", __labels__={"friends"}) + host.add_edge("d", "a", __labels__={"colleagues"}) + + qry = """ + MATCH (n)-[r:friends]->(m) + RETURN n.name, m.name + """ + res = GrandCypher(host).run(qry) + assert res["n.name"] == ['Alice', 'Charlie', 'Diana'] + assert res["m.name"] == ['Bob', 'Diana', 'Alice'] + + def test_multiple_edges_specific_attribute(self): + host = nx.MultiDiGraph() + host.add_node("a", name="Alice", age=30) + host.add_node("b", name="Bob", age=30) + host.add_edge("a", "b", __labels__={"colleague"}, years=3) + host.add_edge("a", "b", __labels__={"friend"}, years=5) + host.add_edge("a", "b", __labels__={"enemy"}, hatred=10) + + qry = """ + MATCH (a)-[r:friend]->(b) + RETURN a.name, b.name, r.years + """ + res = GrandCypher(host).run(qry) + assert res["a.name"] == ["Alice"] + assert res["b.name"] == ["Bob"] + assert res["r.years"] == [{0: 3, 1: 5, 2: None}] # should return None when attr is missing + + def test_edge_directionality(self): + host = nx.MultiDiGraph() + host.add_node("a", name="Alice", age=25) + host.add_node("b", name="Bob", age=30) + host.add_edge("a", "b", __labels__={"friend"}, years=1) + host.add_edge("b", "a", __labels__={"colleague"}, years=2) + host.add_edge("b", "a", __labels__={"mentor"}, years=4) + + qry = """ + MATCH (a)-[r]->(b) + RETURN a.name, b.name, r.__labels__, r.years + """ + res = GrandCypher(host).run(qry) + assert res["a.name"] == ["Alice", "Bob"] + assert res["b.name"] == ["Bob", "Alice"] + assert res["r.__labels__"] == [{0: {'friend'}}, {0: {'colleague'}, 1: {'mentor'}}] + assert res["r.years"] == [{0: 1}, {0: 2, 1: 4}] + + + def test_query_with_missing_edge_attribute(self): + host = nx.MultiDiGraph() + host.add_node("a", name="Alice", age=30) + host.add_node("b", name="Bob", age=40) + host.add_node("c", name="Charlie", age=50) + host.add_edge("a", "b", __labels__={"friend"}, years=3) + host.add_edge("a", "c", __labels__={"colleague"}, years=10) + host.add_edge("b", "c", __labels__={"colleague"}, duration=10) + host.add_edge("b", "c", __labels__={"mentor"}, years=2) + + qry = """ + MATCH (a)-[r:colleague]->(b) + RETURN a.name, b.name, r.duration + """ + res = GrandCypher(host).run(qry) + assert res["a.name"] == ["Alice", "Bob"] + assert res["b.name"] == ["Charlie", "Charlie"] + assert res["r.duration"] == [{0: None}, {0: 10, 1: None}] # should return None when attr is missing + + qry = """ + MATCH (a)-[r:colleague]->(b) + RETURN a.name, b.name, r.years + """ + res = GrandCypher(host).run(qry) + assert res["a.name"] == ["Alice", "Bob"] + assert res["b.name"] == ["Charlie", "Charlie"] + assert res["r.years"] == [{0: 10}, {0: None, 1: 2}] + + qry = """ + MATCH (a)-[r]->(b) + RETURN a.name, b.name, r.__labels__, r.duration + """ + res = GrandCypher(host).run(qry) + assert res["a.name"] == ['Alice', 'Alice', 'Bob'] + assert res["b.name"] == ['Bob', 'Charlie', 'Charlie'] + assert res["r.__labels__"] == [{0: {'friend'}}, {0: {'colleague'}}, {0: {'colleague'}, 1: {'mentor'}}] + assert res["r.duration"] == [{0: None}, {0: None}, {0: 10, 1: None}] + class TestVariableLengthRelationship: def test_single_variable_length_relationship(self): host = nx.DiGraph() From cb2a4e985509d5b4bc70c568e0152c9851aaa503 Mon Sep 17 00:00:00 2001 From: Jack Boylan Date: Thu, 9 May 2024 10:12:31 +0000 Subject: [PATCH 07/23] Cleans up inline docs --- grandcypher/__init__.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/grandcypher/__init__.py b/grandcypher/__init__.py index 5f9428e..1092810 100644 --- a/grandcypher/__init__.py +++ b/grandcypher/__init__.py @@ -204,17 +204,18 @@ def _is_edge_attr_match( ) -> bool: """ Check if an edge in the host graph matches the attributes in the motif, - including the special '__labels__' set attribute. This function is adapted - for MultiDiGraphs. + including the special '__labels__' set attribute. + This function formats edges into + nx.MultiDiGraph format i.e {0: first_relation, 1: ...}. - Args: - motif_edge_id: The motif edge ID. - host_edge_id: The host edge ID. - motif: The motif graph. - host: The host graph. + Arguments: + motif_edge_id (str): The motif edge ID + host_edge_id (str): The host edge ID + motif (nx.Graph): The motif graph + host (nx.Graph): The host graph Returns: - True if the host edge matches the attributes in the motif. + bool: True if the host edge matches the attributes in the motif """ motif_u, motif_v = motif_edge_id host_u, host_v = host_edge_id @@ -350,7 +351,7 @@ def __init__(self, target_graph: nx.Graph, limit=None): self._target_graph = target_graph self._paths = [] self._where_condition: CONDITION = None - self._motif = nx.MultiDiGraph() # nx.DiGraph() + self._motif = nx.MultiDiGraph() self._matches = None self._matche_paths = None self._return_requests = [] From 3595706a8259313be5b88e12acc580253a4830a2 Mon Sep 17 00:00:00 2001 From: Jack Boylan Date: Thu, 9 May 2024 11:54:57 +0000 Subject: [PATCH 08/23] Removes slicing list twice to avoid two copies in memory --- grandcypher/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/grandcypher/__init__.py b/grandcypher/__init__.py index 1092810..6eb5ff5 100644 --- a/grandcypher/__init__.py +++ b/grandcypher/__init__.py @@ -693,7 +693,7 @@ def _edge_hop_motifs(self, motif: nx.MultiDiGraph) -> List[Tuple[nx.Graph, dict] new_edges = [u] + hops + [v] new_motif = nx.MultiDiGraph() new_motif.add_edges_from( - list(zip(new_edges[:-1], new_edges[1:])), __labels__=edge_type + zip(new_edges, new_edges[1:]), __labels__=edge_type ) new_motif.add_node(u, **motif.nodes[u]) new_motif.add_node(v, **motif.nodes[v]) From da81cfd2ffc5fe457218bcfa04eb36ca24e3b8c6 Mon Sep 17 00:00:00 2001 From: Jack Boylan Date: Thu, 9 May 2024 14:56:05 +0000 Subject: [PATCH 09/23] Supports WHERE clause for relationships in multigraphs --- grandcypher/__init__.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/grandcypher/__init__.py b/grandcypher/__init__.py index 6eb5ff5..d127d3a 100644 --- a/grandcypher/__init__.py +++ b/grandcypher/__init__.py @@ -259,7 +259,7 @@ def _aggregate_edge_labels(edges: Dict) -> Dict: aggregated[edge_id] = attrs return aggregated -def _get_entity_from_host(host: nx.DiGraph, entity_name, entity_attribute=None): +def _get_entity_from_host(host: Union[nx.DiGraph, nx.MultiDiGraph], entity_name, entity_attribute=None): if entity_name in host.nodes(): # We are looking for a node mapping in the target graph: if entity_attribute: @@ -276,7 +276,10 @@ def _get_entity_from_host(host: nx.DiGraph, entity_name, entity_attribute=None): return None # print(f"Nothing found for {entity_name} {entity_attribute}") if entity_attribute: # looking for edge attribute: - return edge_data.get(entity_attribute, None) + if isinstance(host, nx.MultiDiGraph): + return [r.get(entity_attribute, None) for r in edge_data.values()] + else: + return edge_data.get(entity_attribute, None) else: return host.get_edge_data(*entity_name) @@ -307,7 +310,7 @@ def inner(match: dict, host: nx.DiGraph, return_endges: list) -> bool: def cond_(should_be, entity_id, operator, value) -> CONDITION: - def inner(match: dict, host: nx.DiGraph, return_endges: list) -> bool: + def inner(match: dict, host: Union[nx.DiGraph, nx.MultiDiGraph], return_endges: list) -> bool: host_entity_id = entity_id.split(".") if host_entity_id[0] in match: host_entity_id[0] = match[host_entity_id[0]] @@ -318,7 +321,13 @@ def inner(match: dict, host: nx.DiGraph, return_endges: list) -> bool: else: raise IndexError(f"Entity {host_entity_id} not in graph.") try: - val = operator(_get_entity_from_host(host, *host_entity_id), value) + if isinstance(host, nx.MultiDiGraph): + # if any of the relations between nodes satisfies condition, return True + r_vals = _get_entity_from_host(host, *host_entity_id) + val = any(operator(r_val, value) for r_val in r_vals) + else: + val = operator(_get_entity_from_host(host, *host_entity_id), value) + except: val = False if val != should_be: From 577d84315543ab573e9c30d668912d0fce97a42b Mon Sep 17 00:00:00 2001 From: Jack Boylan Date: Thu, 9 May 2024 15:01:43 +0000 Subject: [PATCH 10/23] Adds test for multigraph with WHERE clause on single edge --- grandcypher/test_queries.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/grandcypher/test_queries.py b/grandcypher/test_queries.py index 18ee30f..9dee15c 100644 --- a/grandcypher/test_queries.py +++ b/grandcypher/test_queries.py @@ -925,6 +925,29 @@ def test_query_with_missing_edge_attribute(self): assert res["r.__labels__"] == [{0: {'friend'}}, {0: {'colleague'}}, {0: {'colleague'}, 1: {'mentor'}}] assert res["r.duration"] == [{0: None}, {0: None}, {0: 10, 1: None}] + def test_multigraph_single_edge_where(self): + host = nx.MultiDiGraph() + host.add_node("a", name="Alice", age=25) + host.add_node("b", name="Bob", age=30) + host.add_node("c", name="Christine", age=30) + host.add_edge("a", "b", __labels__={"friend"}, years=1, friendly="very") + host.add_edge("b", "a", __labels__={"colleague"}, years=2) + host.add_edge("b", "a", __labels__={"mentor"}, years=4) + host.add_edge("b", "c", __labels__={"chef"}, years=12) + + qry = """ + MATCH (a)-[r]->(b) + WHERE r.friendly == "very" OR r.years == 2 + RETURN a.name, b.name, r.__labels__, r.years, r.friendly + """ + res = GrandCypher(host).run(qry) + assert res["a.name"] == ["Alice", "Bob"] + assert res["b.name"] == ["Bob", "Alice"] + assert res["r.__labels__"] == [{0: {'friend'}}, {0: {'colleague'}, 1: {'mentor'}}] + assert res["r.years"] == [{0: 1}, {0: 2, 1: 4}] + assert res["r.friendly"] == [{0: 'very'}, {0: None, 1: None}] + + class TestVariableLengthRelationship: def test_single_variable_length_relationship(self): host = nx.DiGraph() From e7595635fa8d50a036ea7f14f3e8a1762d84bf0b Mon Sep 17 00:00:00 2001 From: Jack Boylan Date: Tue, 21 May 2024 16:37:26 +0000 Subject: [PATCH 11/23] Accounts for WHERE with string node attributes in MultiDiGraphs --- grandcypher/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/grandcypher/__init__.py b/grandcypher/__init__.py index d127d3a..45bb50e 100644 --- a/grandcypher/__init__.py +++ b/grandcypher/__init__.py @@ -324,6 +324,7 @@ def inner(match: dict, host: Union[nx.DiGraph, nx.MultiDiGraph], return_endges: if isinstance(host, nx.MultiDiGraph): # if any of the relations between nodes satisfies condition, return True r_vals = _get_entity_from_host(host, *host_entity_id) + r_vals = [r_vals] if not isinstance(r_vals, list) else r_vals val = any(operator(r_val, value) for r_val in r_vals) else: val = operator(_get_entity_from_host(host, *host_entity_id), value) From b76b82528df25b5c26e51f64d76e8303b95cefc5 Mon Sep 17 00:00:00 2001 From: Jack Boylan Date: Tue, 21 May 2024 16:37:51 +0000 Subject: [PATCH 12/23] Unifies all unit tests to work with both DiGraphs and MultiDiGraphs --- grandcypher/test_queries.py | 427 +++++++++++++++++++++++------------- 1 file changed, 273 insertions(+), 154 deletions(-) diff --git a/grandcypher/test_queries.py b/grandcypher/test_queries.py index 9dee15c..3a28c55 100644 --- a/grandcypher/test_queries.py +++ b/grandcypher/test_queries.py @@ -3,30 +3,33 @@ from . import _GrandCypherGrammar, _GrandCypherTransformer, GrandCypher +ACCEPTED_GRAPH_TYPES = [nx.MultiDiGraph, nx.DiGraph] class TestWorking: - def test_simple_structural_match(self): + @pytest.mark.parametrize("graph_type", ACCEPTED_GRAPH_TYPES) + def test_simple_structural_match(self, graph_type): tree = _GrandCypherGrammar.parse( """ MATCH (A)-[B]->(C) RETURN A """ ) - host = nx.DiGraph() + host = graph_type() host.add_edge("x", "y") host.add_edge("y", "z") gct = _GrandCypherTransformer(host) gct.transform(tree) assert len(gct._get_true_matches()) == 2 - def test_simple_structural_match_returns_nodes(self): + @pytest.mark.parametrize("graph_type", ACCEPTED_GRAPH_TYPES) + def test_simple_structural_match_returns_nodes(self, graph_type): tree = _GrandCypherGrammar.parse( """ MATCH (A)-[B]->(C) RETURN A """ ) - host = nx.DiGraph() + host = graph_type() host.add_edge("x", "y") host.add_edge("y", "z") gct = _GrandCypherTransformer(host) @@ -35,14 +38,15 @@ def test_simple_structural_match_returns_nodes(self): assert "A" in returns assert len(returns["A"]) == 2 - def test_simple_structural_match_returns_node_attributes(self): + @pytest.mark.parametrize("graph_type", ACCEPTED_GRAPH_TYPES) + def test_simple_structural_match_returns_node_attributes(self, graph_type): tree = _GrandCypherGrammar.parse( """ MATCH (A)-[B]->(C) RETURN A.dinnertime """ ) - host = nx.DiGraph() + host = graph_type() host.add_edge("x", "y") host.add_edge("y", "z") host.add_node("x", dinnertime="no thanks I already ate") @@ -55,8 +59,9 @@ def test_simple_structural_match_returns_node_attributes(self): class TestSimpleAPI: - def test_simple_api(self): - host = nx.DiGraph() + @pytest.mark.parametrize("graph_type", ACCEPTED_GRAPH_TYPES) + def test_simple_api(self, graph_type): + host = graph_type() host.add_edge("x", "y") host.add_edge("y", "z") host.add_node("x", dinnertime="no thanks I already ate") @@ -68,8 +73,9 @@ def test_simple_api(self): assert len(GrandCypher(host).run(qry)["A.dinnertime"]) == 2 - def test_simple_api_triangles(self): - host = nx.DiGraph() + @pytest.mark.parametrize("graph_type", ACCEPTED_GRAPH_TYPES) + def test_simple_api_triangles(self, graph_type): + host = graph_type() host.add_edge("x", "y") host.add_edge("y", "z") host.add_edge("z", "x") @@ -83,8 +89,9 @@ def test_simple_api_triangles(self): assert len(GrandCypher(host).run(qry)["A"]) == 3 - def test_simple_api_single_node_where(self): - host = nx.DiGraph() + @pytest.mark.parametrize("graph_type", ACCEPTED_GRAPH_TYPES) + def test_simple_api_single_node_where(self, graph_type): + host = graph_type() host.add_edge("x", "y") host.add_edge("y", "z") host.add_edge("z", "x") @@ -98,8 +105,9 @@ def test_simple_api_single_node_where(self): assert len(GrandCypher(host).run(qry)["A"]) == 1 - def test_simple_api_single_node_multi_where(self): - host = nx.DiGraph() + @pytest.mark.parametrize("graph_type", ACCEPTED_GRAPH_TYPES) + def test_simple_api_single_node_multi_where(self, graph_type): + host = graph_type() host.add_edge("x", "y") host.add_edge("y", "z") host.add_edge("z", "x") @@ -114,8 +122,9 @@ def test_simple_api_single_node_multi_where(self): assert len(GrandCypher(host).run(qry)["A"]) == 1 - def test_simple_api_single_node_multi_where_2(self): - host = nx.DiGraph() + @pytest.mark.parametrize("graph_type", ACCEPTED_GRAPH_TYPES) + def test_simple_api_single_node_multi_where_2(self, graph_type): + host = graph_type() host.add_edge("x", "y") host.add_edge("y", "z") host.add_edge("z", "x") @@ -132,8 +141,9 @@ def test_simple_api_single_node_multi_where_2(self): assert len(GrandCypher(host).run(qry)["A"]) == 2 - def test_null_where(self): - host = nx.DiGraph() + @pytest.mark.parametrize("graph_type", ACCEPTED_GRAPH_TYPES) + def test_null_where(self, graph_type): + host = graph_type() host.add_node("x", foo="foo") host.add_node("y") host.add_node("z") @@ -145,8 +155,9 @@ def test_null_where(self): """ assert len(GrandCypher(host).run(qry)["A.foo"]) == 2 - def test_simple_api_multi_node_multi_where(self): - host = nx.DiGraph() + @pytest.mark.parametrize("graph_type", ACCEPTED_GRAPH_TYPES) + def test_simple_api_multi_node_multi_where(self, graph_type): + host = graph_type() host.add_edge("x", "y") host.add_edge("y", "z") host.add_edge("z", "x") @@ -163,8 +174,9 @@ def test_simple_api_multi_node_multi_where(self): assert len(GrandCypher(host).run(qry)["A"]) == 1 - def test_simple_api_anonymous_edge(self): - host = nx.DiGraph() + @pytest.mark.parametrize("graph_type", ACCEPTED_GRAPH_TYPES) + def test_simple_api_anonymous_edge(self, graph_type): + host = graph_type() host.add_edge("x", "y") host.add_edge("y", "z") host.add_edge("z", "x") @@ -183,8 +195,9 @@ def test_simple_api_anonymous_edge(self): assert len(GrandCypher(host).run(qry)["A"]) == 3 - def test_simple_api_anonymous_node(self): - host = nx.DiGraph() + @pytest.mark.parametrize("graph_type", ACCEPTED_GRAPH_TYPES) + def test_simple_api_anonymous_node(self, graph_type): + host = graph_type() host.add_edge("x", "y") host.add_edge("y", "z") host.add_edge("x", "z") @@ -206,8 +219,9 @@ def test_simple_api_anonymous_node(self): assert list(res.values())[0] == ["x", "x", "y"] print(res) - def test_single_edge_where(self): - host = nx.DiGraph() + @pytest.mark.parametrize("graph_type", ACCEPTED_GRAPH_TYPES) + def test_single_edge_where(self, graph_type): + host = graph_type() host.add_edge("y", "z") qry = """ @@ -224,8 +238,9 @@ def test_single_edge_where(self): assert len(GrandCypher(host).run(qry)["AB"]) == 1 - def test_simple_api_single_edge_where(self): - host = nx.DiGraph() + @pytest.mark.parametrize("graph_type", ACCEPTED_GRAPH_TYPES) + def test_simple_api_single_edge_where(self, graph_type): + host = graph_type() host.add_edge("x", "y") host.add_edge("y", "z", foo="bar") host.add_edge("z", "x") @@ -238,8 +253,9 @@ def test_simple_api_single_edge_where(self): assert len(GrandCypher(host).run(qry)["A"]) == 1 - def test_simple_api_two_edge_where_clauses_same_edge(self): - host = nx.DiGraph() + @pytest.mark.parametrize("graph_type", ACCEPTED_GRAPH_TYPES) + def test_simple_api_two_edge_where_clauses_same_edge(self, graph_type): + host = graph_type() host.add_edge("x", "y") host.add_edge("y", "z", foo="bar", weight=12) host.add_edge("z", "x") @@ -253,8 +269,9 @@ def test_simple_api_two_edge_where_clauses_same_edge(self): assert len(GrandCypher(host).run(qry)["AB"]) == 1 - def test_simple_api_two_edge_where_clauses_diff_edge(self): - host = nx.DiGraph() + @pytest.mark.parametrize("graph_type", ACCEPTED_GRAPH_TYPES) + def test_simple_api_two_edge_where_clauses_diff_edge(self, graph_type): + host = graph_type() host.add_edge("x", "y") host.add_edge("y", "z", foo="bar") host.add_edge("z", "x", weight=12) @@ -282,12 +299,13 @@ def test_simple_multi_edge(self): class TestDictAttributes: - def test_node_dict(self): + @pytest.mark.parametrize("graph_type", ACCEPTED_GRAPH_TYPES) + def test_node_dict(self, graph_type): qry = """ MATCH (A {type: "foo"})-[]->(B) RETURN A """ - host = nx.DiGraph() + host = graph_type() host.add_node("Y", type="foo") host.add_node("X", type="bar") host.add_edge("X", "Y") @@ -296,8 +314,9 @@ def test_node_dict(self): assert len(GrandCypher(host).run(qry)["A"]) == 1 - def test_null_value(self): - host = nx.DiGraph() + @pytest.mark.parametrize("graph_type", ACCEPTED_GRAPH_TYPES) + def test_null_value(self, graph_type): + host = graph_type() host.add_node("x", foo="foo") host.add_node("y") host.add_node("z") @@ -354,7 +373,8 @@ def test_skip_and_limit(self): ][10:20] ) - def test_single_node_query(self): + @pytest.mark.parametrize("graph_type", ACCEPTED_GRAPH_TYPES) + def test_single_node_query(self, graph_type): """ Test that you can search for individual nodes with properties """ @@ -365,12 +385,13 @@ def test_single_node_query(self): RETURN c """ - host = nx.DiGraph() + host = graph_type() host.add_node("London", type="City", name="London") assert len(GrandCypher(host).run(qry)["c"]) == 1 - def test_multi_node_query(self): + @pytest.mark.parametrize("graph_type", ACCEPTED_GRAPH_TYPES) + def test_multi_node_query(self, graph_type): """ Test that you can search for individual nodes with properties """ @@ -382,15 +403,16 @@ def test_multi_node_query(self): RETURN b, c """ - host = nx.DiGraph() + host = graph_type() host.add_node("London", type="City", name="London") host.add_node("NYC", type="City", name="NYC") host.add_edge("London", "NYC") assert len(GrandCypher(host).run(qry)["c"]) == 1 - def test_left_or_right_direction_with_where(self): - host = nx.DiGraph() + @pytest.mark.parametrize("graph_type", ACCEPTED_GRAPH_TYPES) + def test_left_or_right_direction_with_where(self, graph_type): + host = graph_type() host.add_node("x", name="x") host.add_node("y", name="y") host.add_node("z", name="z") @@ -413,8 +435,9 @@ def test_left_or_right_direction_with_where(self): assert len(res) == 1 assert list(res.values())[0] == ["x"] - def test_disconected_multi_match(self): - host = nx.DiGraph() + @pytest.mark.parametrize("graph_type", ACCEPTED_GRAPH_TYPES) + def test_disconected_multi_match(self, graph_type): + host = graph_type() host.add_node("x", name="x") host.add_node("y", name="y") host.add_node("z", name="z") @@ -429,8 +452,9 @@ def test_disconected_multi_match(self): assert res["C.name"] == ["x", "y", "x", "y"] assert res["D.name"] == ["y", "z", "y", "z"] - def test_chained_edges(self): - host = nx.DiGraph() + @pytest.mark.parametrize("graph_type", ACCEPTED_GRAPH_TYPES) + def test_chained_edges(self, graph_type): + host = graph_type() host.add_node("x", name="x") host.add_node("y", name="y") host.add_node("z", name="z") @@ -471,8 +495,9 @@ def test_chained_edges(self): assert res["B.name"] == ["y"] assert res["C.name"] == ["z"] - def test_chained_backward_edges(self): - host = nx.DiGraph() + @pytest.mark.parametrize("graph_type", ACCEPTED_GRAPH_TYPES) + def test_chained_backward_edges(self, graph_type): + host = graph_type() host.add_node("x", name="x") host.add_node("y", name="y") host.add_node("z", name="z") @@ -529,8 +554,9 @@ def test_chained_backward_edges(self): assert res["B.name"] == ["y"] assert res["C.name"] == ["z"] - def test_undirected(self): - host = nx.DiGraph() + @pytest.mark.parametrize("graph_type", ACCEPTED_GRAPH_TYPES) + def test_undirected(self, graph_type): + host = graph_type() host.add_node("x", name="x") host.add_node("y", name="y") host.add_node("z", name="z") @@ -556,8 +582,9 @@ def test_undirected(self): assert res["A.name"] == ["x"] assert res["B.name"] == ["y"] - def test_anonymous_node(self): - host = nx.DiGraph() + @pytest.mark.parametrize("graph_type", ACCEPTED_GRAPH_TYPES) + def test_anonymous_node(self, graph_type): + host = graph_type() host.add_node("x", name="x") host.add_node("y", name="y") host.add_node("z", name="z") @@ -570,8 +597,9 @@ def test_anonymous_node(self): assert len(res) == 1 assert res["B.name"] == ["y", "y"] - def test_complex_where(self): - host = nx.DiGraph() + @pytest.mark.parametrize("graph_type", ACCEPTED_GRAPH_TYPES) + def test_complex_where(self, graph_type): + host = graph_type() host.add_node("x", foo=12) host.add_node("y", foo=13) host.add_node("z", foo=16) @@ -590,8 +618,9 @@ def test_complex_where(self): class TestDistinct: - def test_basic_distinct1(self): - host = nx.DiGraph() + @pytest.mark.parametrize("graph_type", ACCEPTED_GRAPH_TYPES) + def test_basic_distinct1(self, graph_type): + host = graph_type() host.add_node("a", name="Alice") host.add_node("b", name="Bob") host.add_node("c", name="Alice") # duplicate name @@ -604,8 +633,9 @@ def test_basic_distinct1(self): assert len(res["n.name"]) == 2 # should return "Alice" and "Bob" only once assert "Alice" in res["n.name"] and "Bob" in res["n.name"] - def test_basic_distinct2(self): - host = nx.DiGraph() + @pytest.mark.parametrize("graph_type", ACCEPTED_GRAPH_TYPES) + def test_basic_distinct2(self, graph_type): + host = graph_type() host.add_node("a", name="Alice", age=25) host.add_node("b", name="Bob", age=30) host.add_node("c", name="Carol", age=25) @@ -622,8 +652,9 @@ def test_basic_distinct2(self): assert "Alice" in res["n.name"] and "Bob" in res["n.name"] and "Carol" in res["n.name"] and "Greg" in res["n.name"] - def test_distinct_with_relationships(self): - host = nx.DiGraph() + @pytest.mark.parametrize("graph_type", ACCEPTED_GRAPH_TYPES) + def test_distinct_with_relationships(self, graph_type): + host = graph_type() host.add_node("a", name="Alice") host.add_node("b", name="Bob") host.add_node("c", name="Alice") # duplicate name @@ -639,8 +670,9 @@ def test_distinct_with_relationships(self): assert res["n.name"] == ["Alice"] - def test_distinct_with_limit_and_skip(self): - host = nx.DiGraph() + @pytest.mark.parametrize("graph_type", ACCEPTED_GRAPH_TYPES) + def test_distinct_with_limit_and_skip(self, graph_type): + host = graph_type() for i in range(5): host.add_node(f"a{i}", name="Alice") host.add_node(f"b{i}", name="Bob") @@ -654,8 +686,9 @@ def test_distinct_with_limit_and_skip(self): assert res["n.name"] == ["Bob"] # assuming alphabetical order - def test_distinct_on_complex_graph(self): - host = nx.DiGraph() + @pytest.mark.parametrize("graph_type", ACCEPTED_GRAPH_TYPES) + def test_distinct_on_complex_graph(self, graph_type): + host = graph_type() host.add_node("a", name="Alice") host.add_node("b", name="Bob") host.add_node("c", name="Carol") @@ -673,8 +706,9 @@ def test_distinct_on_complex_graph(self): assert "Alice" in res["n.name"] and "Bob" in res["n.name"] and "Carol" in res["n.name"] assert len(res["m.name"]) == 3 # should account for paths without considering duplicate names - def test_distinct_with_attributes(self): - host = nx.DiGraph() + @pytest.mark.parametrize("graph_type", ACCEPTED_GRAPH_TYPES) + def test_distinct_with_attributes(self, graph_type): + host = graph_type() host.add_node("a", name="Alice", age=25) host.add_node("b", name="Alice", age=30) # same name, different attribute host.add_node("c", name="Bob", age=25) @@ -690,8 +724,9 @@ def test_distinct_with_attributes(self): class TestOrderBy: - def test_order_by_single_field_ascending(self): - host = nx.DiGraph() + @pytest.mark.parametrize("graph_type", ACCEPTED_GRAPH_TYPES) + def test_order_by_single_field_ascending(self, graph_type): + host = graph_type() host.add_node("a", name="Alice", age=25) host.add_node("b", name="Bob", age=30) host.add_node("c", name="Carol", age=20) @@ -704,8 +739,9 @@ def test_order_by_single_field_ascending(self): res = GrandCypher(host).run(qry) assert res["n.name"] == ["Carol", "Alice", "Bob"] - def test_order_by_single_field_descending(self): - host = nx.DiGraph() + @pytest.mark.parametrize("graph_type", ACCEPTED_GRAPH_TYPES) + def test_order_by_single_field_descending(self, graph_type): + host = graph_type() host.add_node("a", name="Alice", age=25) host.add_node("b", name="Bob", age=30) host.add_node("c", name="Carol", age=20) @@ -718,8 +754,9 @@ def test_order_by_single_field_descending(self): res = GrandCypher(host).run(qry) assert res["n.name"] == ["Bob", "Alice", "Carol"] - def test_order_by_single_field_no_direction_provided(self): - host = nx.DiGraph() + @pytest.mark.parametrize("graph_type", ACCEPTED_GRAPH_TYPES) + def test_order_by_single_field_no_direction_provided(self, graph_type): + host = graph_type() host.add_node("a", name="Alice", age=25) host.add_node("b", name="Bob", age=30) host.add_node("c", name="Carol", age=20) @@ -732,8 +769,9 @@ def test_order_by_single_field_no_direction_provided(self): res = GrandCypher(host).run(qry) assert res["n.name"] == ["Carol", "Alice", "Bob"] - def test_order_by_multiple_fields(self): - host = nx.DiGraph() + @pytest.mark.parametrize("graph_type", ACCEPTED_GRAPH_TYPES) + def test_order_by_multiple_fields(self, graph_type): + host = graph_type() host.add_node("a", name="Alice", age=25) host.add_node("b", name="Bob", age=30) host.add_node("c", name="Carol", age=25) @@ -748,8 +786,9 @@ def test_order_by_multiple_fields(self): # names sorted in descending order where ages are the same assert res["n.name"] == ["Dave", "Carol", "Alice", "Bob"] - def test_order_by_with_limit(self): - host = nx.DiGraph() + @pytest.mark.parametrize("graph_type", ACCEPTED_GRAPH_TYPES) + def test_order_by_with_limit(self, graph_type): + host = graph_type() host.add_node("a", name="Alice", age=25) host.add_node("b", name="Bob", age=30) host.add_node("c", name="Carol", age=20) @@ -762,8 +801,9 @@ def test_order_by_with_limit(self): res = GrandCypher(host).run(qry) assert res["n.name"] == ["Carol", "Alice"] - def test_order_by_with_skip(self): - host = nx.DiGraph() + @pytest.mark.parametrize("graph_type", ACCEPTED_GRAPH_TYPES) + def test_order_by_with_skip(self, graph_type): + host = graph_type() host.add_node("a", name="Alice", age=25) host.add_node("b", name="Bob", age=30) host.add_node("c", name="Carol", age=20) @@ -776,8 +816,9 @@ def test_order_by_with_skip(self): res = GrandCypher(host).run(qry) assert res["n.name"] == ["Alice", "Bob"] - def test_order_by_with_distinct(self): - host = nx.DiGraph() + @pytest.mark.parametrize("graph_type", ACCEPTED_GRAPH_TYPES) + def test_order_by_with_distinct(self, graph_type): + host = graph_type() host.add_node("a", name="Alice", age=25) host.add_node("b", name="Bob", age=30) host.add_node("c", name="Carol", age=25) @@ -794,8 +835,9 @@ def test_order_by_with_distinct(self): assert res["n.name"] == ['Greg', 'Bob', 'Alice', 'Carol'] assert res["n.age"] == [32, 30, 25, 25] - def test_error_on_order_by_with_distinct_and_non_returned_field(self): - host = nx.DiGraph() + @pytest.mark.parametrize("graph_type", ACCEPTED_GRAPH_TYPES) + def test_error_on_order_by_with_distinct_and_non_returned_field(self, graph_type): + host = graph_type() host.add_node("a", name="Alice", age=25) host.add_node("b", name="Bob", age=30) host.add_node("c", name="Carol", age=25) @@ -811,8 +853,9 @@ def test_error_on_order_by_with_distinct_and_non_returned_field(self): with pytest.raises(Exception): res = GrandCypher(host).run(qry) - def test_order_by_with_non_returned_field(self): - host = nx.DiGraph() + @pytest.mark.parametrize("graph_type", ACCEPTED_GRAPH_TYPES) + def test_order_by_with_non_returned_field(self, graph_type): + host = graph_type() host.add_node("a", name="Alice", age=25) host.add_node("b", name="Bob", age=30) host.add_node("c", name="Carol", age=20) @@ -947,10 +990,28 @@ def test_multigraph_single_edge_where(self): assert res["r.years"] == [{0: 1}, {0: 2, 1: 4}] assert res["r.friendly"] == [{0: 'very'}, {0: None, 1: None}] + def test_multigraph_where_node_attribute(self): + host = nx.MultiDiGraph() + host.add_node("a", name="Alice", age=25) + host.add_node("b", name="Bob", age=30) + host.add_node("c", name="Christine", age=30) + host.add_edge("a", "b", __labels__={"friend"}, years=1, friendly="very") + host.add_edge("b", "a", __labels__={"colleague"}, years=2) + host.add_edge("b", "a", __labels__={"mentor"}, years=4) + host.add_edge("b", "c", __labels__={"chef"}, years=12) + + qry = """ + MATCH (a)-[r]->(b) + WHERE a.name == "Alice" OR b.name == "Bob" + RETURN a.name, b.name, r.__labels__, r.years, r.friendly + """ + res = GrandCypher(host).run(qry) + class TestVariableLengthRelationship: - def test_single_variable_length_relationship(self): - host = nx.DiGraph() + @pytest.mark.parametrize("graph_type", ACCEPTED_GRAPH_TYPES) + def test_single_variable_length_relationship(self, graph_type): + host = graph_type() host.add_node("x", foo=12) host.add_node("y", foo=13) host.add_node("z", foo=16) @@ -978,7 +1039,12 @@ def test_single_variable_length_relationship(self): assert len(res) == 3 assert res["A"] == ["x", "y", "z"] assert res["B"] == ["y", "z", "x"] - assert res["r"] == [[{"bar": "1"}], [{"bar": "2"}], [{"bar": "3"}]] + assert graph_type in ACCEPTED_GRAPH_TYPES + if graph_type is nx.DiGraph: + assert res["r"] == [[{"bar": "1"}], [{"bar": "2"}], [{"bar": "3"}]] + elif graph_type is nx.MultiDiGraph: + # MultiDiGraphs return a list of dictionaries to accommodate multiple edges between nodes + assert res["r"] == [[{0: {'bar': '1'}}], [{0: {'bar': '2'}}], [{0: {'bar': '3'}}]] qry = """ MATCH (A)-[r*2]->(B) @@ -989,14 +1055,23 @@ def test_single_variable_length_relationship(self): assert len(res) == 3 assert res["A"] == ["x", "y", "z"] assert res["B"] == ["z", "x", "y"] - assert res["r"] == [ - [{"bar": "1"}, {"bar": "2"}], - [{"bar": "2"}, {"bar": "3"}], - [{"bar": "3"}, {"bar": "1"}], - ] - - def test_complex_variable_length_relationship(self): - host = nx.DiGraph() + assert graph_type in ACCEPTED_GRAPH_TYPES + if graph_type is nx.DiGraph: + assert res["r"] == [ + [{"bar": "1"}, {"bar": "2"}], + [{"bar": "2"}, {"bar": "3"}], + [{"bar": "3"}, {"bar": "1"}], + ] + elif graph_type is nx.MultiGraph: + assert res["r"] == [ + [{0: {'bar': '1'}}, {1: {'bar': '2'}}], + [{0: {'bar': '2'}}, {1: {'bar': '3'}}], + [{0: {'bar': '3'}}, {1: {'bar': '1'}}], + ] + + @pytest.mark.parametrize("graph_type", ACCEPTED_GRAPH_TYPES) + def test_complex_variable_length_relationship(self, graph_type): + host = graph_type() host.add_node("x", foo=12) host.add_node("y", foo=13) host.add_node("z", foo=16) @@ -1013,22 +1088,35 @@ def test_complex_variable_length_relationship(self): assert len(res) == 3 assert res["A"] == ["x", "y", "z", "x", "y", "z", "x", "y", "z"] assert res["B"] == ["x", "y", "z", "y", "z", "x", "z", "x", "y"] - assert res["r"] == [ - [None], - [None], - [None], - [{"bar": "1"}], - [{"bar": "2"}], - [{"bar": "3"}], - [{"bar": "1"}, {"bar": "2"}], - [{"bar": "2"}, {"bar": "3"}], - [{"bar": "3"}, {"bar": "1"}], - ] + assert graph_type in ACCEPTED_GRAPH_TYPES + if graph_type is nx.DiGraph: + assert res["r"] == [ + [None], + [None], + [None], + [{"bar": "1"}], + [{"bar": "2"}], + [{"bar": "3"}], + [{"bar": "1"}, {"bar": "2"}], + [{"bar": "2"}, {"bar": "3"}], + [{"bar": "3"}, {"bar": "1"}], + ] + elif graph_type is nx.MultiDiGraph: + assert res["r"] == [ + [None], [None], [None], + [{0: {'bar': '1'}}], + [{0: {'bar': '2'}}], + [{0: {'bar': '3'}}], + [{0: {'bar': '1'}}, {0: {'bar': '2'}}], + [{0: {'bar': '2'}}, {0: {'bar': '3'}}], + [{0: {'bar': '3'}}, {0: {'bar': '1'}}] + ] class TestType: - def test_host_no_edge_type(self): - host = nx.DiGraph() + @pytest.mark.parametrize("graph_type", ACCEPTED_GRAPH_TYPES) + def test_host_no_edge_type(self, graph_type): + host = graph_type() host.add_node("x") host.add_node("y") host.add_node("z") @@ -1046,8 +1134,9 @@ def test_host_no_edge_type(self): assert res["A"] == [] assert res["B"] == [] - def test_edge_type(self): - host = nx.DiGraph() + @pytest.mark.parametrize("graph_type", ACCEPTED_GRAPH_TYPES) + def test_edge_type(self, graph_type): + host = graph_type() host.add_node("x") host.add_node("y") host.add_node("z") @@ -1086,8 +1175,9 @@ def test_edge_type(self): assert res["A"] == ["y"] assert res["B"] == ["z"] - def test_edge_type_hop(self): - host = nx.DiGraph() + @pytest.mark.parametrize("graph_type", ACCEPTED_GRAPH_TYPES) + def test_edge_type_hop(self, graph_type): + host = graph_type() host.add_node("x") host.add_node("y") host.add_node("z") @@ -1125,20 +1215,35 @@ def test_edge_type_hop(self): assert len(res) == 3 assert res["A"] == ["x", "y", "z", "x", "y", "z", "x", "y", "z"] assert res["B"] == ["x", "y", "z", "y", "z", "x", "z", "x", "y"] - assert res["r"] == [ - [None], - [None], - [None], - [{"__labels__": {"Edge", "XY"}}], - [{"__labels__": {"Edge", "YZ"}}], - [{"__labels__": {"Edge", "ZX"}}], - [{"__labels__": {"Edge", "XY"}}, {"__labels__": {"Edge", "YZ"}}], - [{"__labels__": {"Edge", "YZ"}}, {"__labels__": {"Edge", "ZX"}}], - [{"__labels__": {"Edge", "ZX"}}, {"__labels__": {"Edge", "XY"}}], - ] - - def test_host_no_node_type(self): - host = nx.DiGraph() + assert graph_type in ACCEPTED_GRAPH_TYPES + if graph_type is nx.DiGraph: + assert res["r"] == [ + [None], + [None], + [None], + [{"__labels__": {"Edge", "XY"}}], + [{"__labels__": {"Edge", "YZ"}}], + [{"__labels__": {"Edge", "ZX"}}], + [{"__labels__": {"Edge", "XY"}}, {"__labels__": {"Edge", "YZ"}}], + [{"__labels__": {"Edge", "YZ"}}, {"__labels__": {"Edge", "ZX"}}], + [{"__labels__": {"Edge", "ZX"}}, {"__labels__": {"Edge", "XY"}}], + ] + elif graph_type is nx.MultiDiGraph: + assert res["r"] == [ + [None], + [None], + [None], + [{0: {'__labels__': {'Edge', 'XY'}}}], + [{0: {'__labels__': {'Edge', 'YZ'}}}], + [{0: {'__labels__': {'Edge', 'ZX'}}}], + [{0: {'__labels__': {'Edge', 'XY'}}}, {0: {'__labels__': {'Edge', 'YZ'}}}], + [{0: {'__labels__': {'Edge', 'YZ'}}}, {0: {'__labels__': {'Edge', 'ZX'}}}], + [{0: {'__labels__': {'Edge', 'ZX'}}}, {0: {'__labels__': {'Edge', 'XY'}}}] + ] + + @pytest.mark.parametrize("graph_type", ACCEPTED_GRAPH_TYPES) + def test_host_no_node_type(self, graph_type): + host = graph_type() host.add_node("x") host.add_node("y") host.add_node("z") @@ -1156,8 +1261,9 @@ def test_host_no_node_type(self): assert res["A"] == [] assert res["B"] == [] - def test_node_type(self): - host = nx.DiGraph() + @pytest.mark.parametrize("graph_type", ACCEPTED_GRAPH_TYPES) + def test_node_type(self, graph_type): + host = graph_type() host.add_node("x", __labels__=set(["Node", "X"]), foo="1") host.add_node("y", __labels__=set(["Node", "Y"]), foo="2") host.add_node("z", __labels__=set(["Node", "Z"]), foo="3") @@ -1196,8 +1302,9 @@ def test_node_type(self): assert res["A"] == ["y"] assert res["B"] == ["z"] - def test_node_type_edge_hop(self): - host = nx.DiGraph() + @pytest.mark.parametrize("graph_type", ACCEPTED_GRAPH_TYPES) + def test_node_type_edge_hop(self, graph_type): + host = graph_type() host.add_node("x", __labels__=set(["Node", "X"]), foo="1") host.add_node("y", __labels__=set(["Node", "Y"]), foo="2") host.add_node("z", __labels__=set(["Node", "Z"]), foo="3") @@ -1237,8 +1344,9 @@ def test_node_type_edge_hop(self): class TestSpecialCases: - def test_two_edge_hop_with_edge_node_type(self): - host = nx.DiGraph() + @pytest.mark.parametrize("graph_type", ACCEPTED_GRAPH_TYPES) + def test_two_edge_hop_with_edge_node_type(self, graph_type): + host = graph_type() host.add_node("C_1_1", __labels__=set(["X"]), head=True) host.add_node("C_1_2", __labels__=set(["X"])) host.add_node("C_1_3", __labels__=set(["X"])) @@ -1281,8 +1389,9 @@ def test_two_edge_hop_with_edge_node_type(self): class TestComments: - def test_line_comments(self): - host = nx.DiGraph() + @pytest.mark.parametrize("graph_type", ACCEPTED_GRAPH_TYPES) + def test_line_comments(self, graph_type): + host = graph_type() host.add_node("x", foo=12) host.add_node("y", foo=13) host.add_node("z", foo=16) @@ -1316,8 +1425,9 @@ def test_line_comments(self): res = GrandCypher(host).run(qry) - def test_end_of_line_comments(self): - host = nx.DiGraph() + @pytest.mark.parametrize("graph_type", ACCEPTED_GRAPH_TYPES) + def test_end_of_line_comments(self, graph_type): + host = graph_type() host.add_node("x", foo=12) host.add_node("y", foo=13) host.add_node("z", foo=16) @@ -1348,8 +1458,9 @@ def test_end_of_line_comments(self): res = GrandCypher(host).run(qry) - def test_every_line_comments(self): - host = nx.DiGraph() + @pytest.mark.parametrize("graph_type", ACCEPTED_GRAPH_TYPES) + def test_every_line_comments(self, graph_type): + host = graph_type() host.add_node("x", foo=12) host.add_node("y", foo=13) host.add_node("z", foo=16) @@ -1367,8 +1478,9 @@ def test_every_line_comments(self): res = GrandCypher(host).run(qry) assert len(res) == 3 - def test_mid_query_comment(self): - host = nx.DiGraph() + @pytest.mark.parametrize("graph_type", ACCEPTED_GRAPH_TYPES) + def test_mid_query_comment(self, graph_type): + host = graph_type() host.add_node("x", foo=12) host.add_node("y", foo=13) host.add_node("z", foo=16) @@ -1393,8 +1505,9 @@ def test_mid_query_comment(self): class TestStringOperators: - def test_starts_with(self): - host = nx.DiGraph() + @pytest.mark.parametrize("graph_type", ACCEPTED_GRAPH_TYPES) + def test_starts_with(self, graph_type): + host = graph_type() host.add_node(1, name="Ford Prefect") host.add_node(2, name="Arthur Dent") host.add_edge(1, 2) @@ -1408,8 +1521,9 @@ def test_starts_with(self): res = GrandCypher(host).run(qry) assert len(res) == 1 - def test_ends_with(self): - host = nx.DiGraph() + @pytest.mark.parametrize("graph_type", ACCEPTED_GRAPH_TYPES) + def test_ends_with(self, graph_type): + host = graph_type() host.add_node(1, name="Ford Prefect") host.add_node(2, name="Arthur Dent") host.add_edge(1, 2) @@ -1432,8 +1546,9 @@ def test_ends_with(self): res = GrandCypher(host).run(qry) assert len(res["A"]) == 2 - def test_contains(self): - host = nx.DiGraph() + @pytest.mark.parametrize("graph_type", ACCEPTED_GRAPH_TYPES) + def test_contains(self, graph_type): + host = graph_type() host.add_node(1, name="Ford Prefect") host.add_node(2, name="Arthur Dent") host.add_edge(1, 2) @@ -1458,8 +1573,9 @@ def test_contains(self): class TestNot: - def test_not(self): - host = nx.DiGraph() + @pytest.mark.parametrize("graph_type", ACCEPTED_GRAPH_TYPES) + def test_not(self, graph_type): + host = graph_type() host.add_node(1, name="Ford Prefect") host.add_node(2, name="Arthur Dent") host.add_edge(1, 2) @@ -1473,8 +1589,9 @@ def test_not(self): res = GrandCypher(host).run(qry) assert len(res["A"]) == 1 - def test_doublenot(self): - host = nx.DiGraph() + @pytest.mark.parametrize("graph_type", ACCEPTED_GRAPH_TYPES) + def test_doublenot(self, graph_type): + host = graph_type() host.add_node(1, name="Ford Prefect") host.add_node(2, name="Arthur Dent") host.add_edge(1, 2) @@ -1488,8 +1605,9 @@ def test_doublenot(self): res = GrandCypher(host).run(qry) assert len(res["A"]) == 1 - def test_nested_nots_in_statements(self): - host = nx.DiGraph() + @pytest.mark.parametrize("graph_type", ACCEPTED_GRAPH_TYPES) + def test_nested_nots_in_statements(self, graph_type): + host = graph_type() host.add_node("Piano", votes=42, percussion="yup", strings="yup") host.add_node("Guitar", votes=16, percussion="nah", strings="yup") host.add_node("Drum", votes=12, percussion="yup", strings="nah") @@ -1520,8 +1638,9 @@ def test_nested_nots_in_statements(self): class TestPath: - def test_path(self): - host = nx.DiGraph() + @pytest.mark.parametrize("graph_type", ACCEPTED_GRAPH_TYPES) + def test_path(self, graph_type): + host = graph_type() host.add_node("x", name="x") host.add_node("y", name="y") host.add_node("z", name="z") From 6748db7f1bbe7079739ee4ca5bf541b5670e23cc Mon Sep 17 00:00:00 2001 From: Jack Boylan Date: Wed, 22 May 2024 09:40:46 +0000 Subject: [PATCH 13/23] Completes multidigraph test for WHERE on node attribute --- grandcypher/test_queries.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/grandcypher/test_queries.py b/grandcypher/test_queries.py index 3a28c55..450b28b 100644 --- a/grandcypher/test_queries.py +++ b/grandcypher/test_queries.py @@ -1002,10 +1002,15 @@ def test_multigraph_where_node_attribute(self): qry = """ MATCH (a)-[r]->(b) - WHERE a.name == "Alice" OR b.name == "Bob" + WHERE a.name == "Alice" RETURN a.name, b.name, r.__labels__, r.years, r.friendly """ res = GrandCypher(host).run(qry) + assert res["a.name"] == ["Alice"] + assert res["b.name"] == ["Bob"] + assert res["r.__labels__"] == [{0: {'friend'}}] + assert res["r.years"] == [{0: 1}] + assert res["r.friendly"] == [{0: 'very'}] class TestVariableLengthRelationship: From 022a4381a099f2c374bcc9d4db74983962a4291c Mon Sep 17 00:00:00 2001 From: Jack Boylan Date: Wed, 22 May 2024 14:18:44 +0000 Subject: [PATCH 14/23] Supports logical OR for relationship matching --- grandcypher/__init__.py | 44 ++++++++++++++++++++++++++++++----------- 1 file changed, 32 insertions(+), 12 deletions(-) diff --git a/grandcypher/__init__.py b/grandcypher/__init__.py index abd2824..4c3259b 100644 --- a/grandcypher/__init__.py +++ b/grandcypher/__init__.py @@ -16,7 +16,7 @@ import grandiso -from lark import Lark, Transformer, v_args, Token +from lark import Lark, Transformer, v_args, Token, Tree _OPERATORS = { @@ -107,8 +107,8 @@ edge_match : LEFT_ANGLE? "--" RIGHT_ANGLE? | LEFT_ANGLE? "-[]-" RIGHT_ANGLE? | LEFT_ANGLE? "-[" CNAME "]-" RIGHT_ANGLE? - | LEFT_ANGLE? "-[" CNAME ":" TYPE "]-" RIGHT_ANGLE? - | LEFT_ANGLE? "-[" ":" TYPE "]-" RIGHT_ANGLE? + | LEFT_ANGLE? "-[" CNAME ":" type_list "]-" RIGHT_ANGLE? + | LEFT_ANGLE? "-[" ":" type_list "]-" RIGHT_ANGLE? | LEFT_ANGLE? "-[" "*" MIN_HOP "]-" RIGHT_ANGLE? | LEFT_ANGLE? "-[" "*" MIN_HOP ".." MAX_HOP "]-" RIGHT_ANGLE? | LEFT_ANGLE? "-[" CNAME "*" MIN_HOP "]-" RIGHT_ANGLE? @@ -118,6 +118,7 @@ | LEFT_ANGLE? "-[" CNAME ":" TYPE "*" MIN_HOP "]-" RIGHT_ANGLE? | LEFT_ANGLE? "-[" CNAME ":" TYPE "*" MIN_HOP ".." MAX_HOP "]-" RIGHT_ANGLE? +type_list : TYPE ( "|" TYPE )* LEFT_ANGLE : "<" RIGHT_ANGLE : ">" @@ -228,10 +229,14 @@ def _is_edge_attr_match( motif_edges = _aggregate_edge_labels(motif_edges) host_edges = _aggregate_edge_labels(host_edges) + motif_types = motif_edges.get('__labels__', set()) + host_types = host_edges.get('__labels__', set()) + + if motif_types and not motif_types.intersection(host_types): + return False + for attr, val in motif_edges.items(): if attr == "__labels__": - if val and val - host_edges.get("__labels__", set()): - return False continue if host_edges.get(attr) != val: return False @@ -775,10 +780,21 @@ def entity_id(self, entity_id): return ".".join(entity_id) return entity_id.value - def edge_match(self, edge_name): - direction = cname = min_hop = max_hop = edge_type = None + def edge_match(self, edge_tokens): + def flatten_tokens(edge_tokens): + flat_tokens = [] + for token in edge_tokens: + if isinstance(token, Tree): + flat_tokens.extend(flatten_tokens(token.children)) # Recursively flatten the tree + else: + flat_tokens.append(token) + return flat_tokens + + direction = cname = min_hop = max_hop = None + edge_types = [] + edge_tokens = flatten_tokens(edge_tokens) - for token in edge_name: + for token in edge_tokens: if token.type == "MIN_HOP": min_hop = int(token.value) elif token.type == "MAX_HOP": @@ -790,15 +806,19 @@ def edge_match(self, edge_name): elif token.type == "RIGHT_ANGLE": direction = "r" elif token.type == "TYPE": - edge_type = token.value + edge_types.append(token.value) else: cname = token direction = direction if direction is not None else "b" if (min_hop is not None or max_hop is not None) and (direction == "b"): - raise TypeError("not support edge hopping for bidirectional edge") + raise TypeError("Bidirectional edge does not support edge hopping") + + # Handle the case where no edge types are specified, defaulting to a generic type if needed + if edge_types == []: + edge_types = None - return (cname, edge_type, direction, min_hop, max_hop) + return (cname, edge_types, direction, min_hop, max_hop) def node_match(self, node_name): cname = node_type = json_data = None @@ -845,7 +865,7 @@ def match_clause(self, match_clause: Tuple): if maxh > self._max_hop: raise ValueError(f"max hop is caped at 100, found {maxh}!") if t: - t = set([t]) + t = set([t] if type(t) is str else t) self._motif.add_edges_from( edges, __min_hop__=minh, __max_hop__=maxh, __is_hop__=ish, __labels__=t ) From 05f98b3937aa86c5f21745045c6b789c7bf80bcc Mon Sep 17 00:00:00 2001 From: Jack Boylan Date: Wed, 22 May 2024 16:17:03 +0000 Subject: [PATCH 15/23] Adds tests for logical OR in MATCH for relationships --- grandcypher/test_queries.py | 91 +++++++++++++++++++++++++++++++++++++ 1 file changed, 91 insertions(+) diff --git a/grandcypher/test_queries.py b/grandcypher/test_queries.py index 450b28b..9fc0bfb 100644 --- a/grandcypher/test_queries.py +++ b/grandcypher/test_queries.py @@ -1661,3 +1661,94 @@ def test_path(self, graph_type): res = GrandCypher(host).run(qry) assert len(res["P"][0]) == 5 + + +class TestMatchWithOrOperatorInRelationships: + @pytest.mark.parametrize("graph_type", ACCEPTED_GRAPH_TYPES) + def test_match_with_single_or_operator(self, graph_type): + host = graph_type() + host.add_node("a", name="Alice") + host.add_node("b", name="Bob") + host.add_node("c", name="Carol") + host.add_edge("a", "b", __labels__={"LOVES"}) + host.add_edge("b", "c", __labels__={"WORKS_WITH"}) + + qry = """ + MATCH (n1)-[r:LOVES|WORKS_WITH]->(n2) + RETURN n1.name, n2.name + """ + res = GrandCypher(host).run(qry) + assert res["n1.name"] == ["Alice", "Bob"] + assert res["n2.name"] == ["Bob", "Carol"] + + @pytest.mark.parametrize("graph_type", ACCEPTED_GRAPH_TYPES) + def test_match_with_multiple_or_operators(self, graph_type): + host = graph_type() + host.add_node("a", name="Alice") + host.add_node("b", name="Bob") + host.add_node("c", name="Carol") + host.add_node("d", name="Derek") + host.add_edge("a", "b", __labels__={"LOVES"}) + host.add_edge("a", "c", __labels__={"KNOWS"}) + host.add_edge("b", "c", __labels__={"LIVES_NEAR"}) + host.add_edge("b", "d", __labels__={"WORKS_WITH"}) + + qry = """ + MATCH (n1)-[r:LOVES|KNOWS|LIVES_NEAR]->(n2) + RETURN n1.name, n2.name + """ + res = GrandCypher(host).run(qry) + assert res["n1.name"] == ["Alice", "Alice", "Bob"] + assert res["n2.name"] == ["Bob", "Carol", "Carol"] + + @pytest.mark.parametrize("graph_type", ACCEPTED_GRAPH_TYPES) + def test_match_with_or_operator_and_other_conditions(self, graph_type): + host = graph_type() + host.add_node("a", name="Alice", age=30) + host.add_node("b", name="Bob", age=25) + host.add_node("c", name="Carol", age=40) + host.add_edge("a", "b", __labels__={"LOVES"}) + host.add_edge("a", "c", __labels__={"KNOWS"}) + host.add_edge("b", "c", __labels__={"WORKS_WITH"}) + + qry = """ + MATCH (n1)-[r:LOVES|KNOWS]->(n2) + WHERE n1.age > 28 AND n2.age > 35 + RETURN n1.name, n2.name + """ + res = GrandCypher(host).run(qry) + assert res["n1.name"] == ["Alice"] + assert res["n2.name"] == ["Carol"] + + @pytest.mark.parametrize("graph_type", ACCEPTED_GRAPH_TYPES) + def test_no_results_when_no_matching_edges(self, graph_type): + host = graph_type() + host.add_node("a", name="Alice") + host.add_node("b", name="Bob") + host.add_edge("a", "b", __labels__={"WORKS_WITH"}) + + qry = """ + MATCH (n1)-[r:IN_CITY|HAS_ROUTE]->(n2) + RETURN n1.name, n2.name + """ + res = GrandCypher(host).run(qry) + assert len(res["n1.name"]) == 0 # No results because no edges match + + def test_multigraph_match_with_single_or_operator(self): + host = nx.MultiDiGraph() + host.add_node("a", name="Alice") + host.add_node("b", name="Bob") + host.add_node("c", name="Carol") + host.add_node("d", name="Derek") + host.add_edge("a", "b", __labels__={"LOVES"}) + host.add_edge("b", "c", __labels__={"WORKS_WITH"}) + host.add_edge("b", "c", __labels__={"DISLIKES"}) + host.add_edge("b", "d", __labels__={"DISLIKES"}) + + qry = """ + MATCH (n1)-[r:IS_SUING|DISLIKES]->(n2) + RETURN n1.name, n2.name + """ + res = GrandCypher(host).run(qry) + assert res["n1.name"] == ["Bob", "Bob"] + assert res["n2.name"] == ["Carol", "Derek"] From 351eb6e5732aa932952aa95d96582887717ee56b Mon Sep 17 00:00:00 2001 From: Jack Boylan Date: Fri, 7 Jun 2024 18:13:47 +0000 Subject: [PATCH 16/23] Implements aggregation functions --- grandcypher/__init__.py | 113 +++++++++++++++++++++++++++++++++++++--- 1 file changed, 107 insertions(+), 6 deletions(-) diff --git a/grandcypher/__init__.py b/grandcypher/__init__.py index 4c3259b..94d2652 100644 --- a/grandcypher/__init__.py +++ b/grandcypher/__init__.py @@ -81,7 +81,13 @@ -return_clause : "return"i distinct_return? entity_id ("," entity_id)* + +return_clause : "return"i distinct_return? return_item ("," return_item)* +return_item : entity_id | aggregation_function | entity_id "." attribute_id + +aggregation_function : AGGREGATE_FUNC "(" entity_id ( "." attribute_id )? ")" +AGGREGATE_FUNC : "COUNT" | "SUM" | "AVG" | "MAX" | "MIN" +attribute_id : CNAME distinct_return : "DISTINCT"i limit_clause : "limit"i NUMBER @@ -282,6 +288,16 @@ def _get_entity_from_host( edge_data = host.get_edge_data(*entity_name) if not edge_data: return None # print(f"Nothing found for {entity_name} {entity_attribute}") + + # import ipdb;ipdb.set_trace() + # result = {} + # if entity_attribute: + # for rel_type, attrs in edge_data.items(): + # result[str(rel_type) + '.' + entity_attribute] = attrs.get(entity_attribute, None) + # else: + # for rel_type, attrs in edge_data.items(): + # result[str(rel_type)] = attrs + # return result if entity_attribute: # looking for edge attribute: if isinstance(host, nx.MultiDiGraph): @@ -376,6 +392,7 @@ def __init__(self, target_graph: nx.Graph, limit=None): self._matche_paths = None self._return_requests = [] self._return_edges = {} + self._aggregate_functions = [] self._distinct = False self._order_by = None self._order_by_attributes = set() @@ -483,9 +500,13 @@ def _lookup(self, data_paths: List[str], offset_limit) -> Dict[str, List]: for r in ret: r_attr = {} for i, v in r.items(): - r_attr[i] = v.get(entity_attribute, None) + # r_attr[list(v.get('__labels__'))[0]] = v.get(entity_attribute, None) + # [{'paid': 90}, {'paid': 650, 'friend': None}] + r_attr[(i, list(v.get('__labels__'))[0])] = v.get(entity_attribute, None) + # [{0: 70, 1: 90}, {0: 400, 1: None, 2: 650}] + # [{(0, 'paid'): 70, (1, 'paid'): 90}, {(0, 'paid'): 400, (1, 'friend'): None, (2, 'paid'): 650}] ret_with_attr.append(r_attr) - + ret = ret_with_attr result[data_path] = list(ret)[offset_limit] @@ -497,9 +518,19 @@ def return_clause(self, clause): # collect all entity identifiers to be returned for item in clause: if item: - if not isinstance(item, str): - item = str(item.value) - self._return_requests.append(item) + item = item.children[0] if isinstance(item, Tree) else item + if isinstance(item, Tree) and item.data == "aggregation_function": + func = str(item.children[0].value) # AGGREGATE_FUNC + entity = str(item.children[1].value) + if len(item.children) > 2: + entity += "." + str(item.children[2].children[0].value) + self._aggregate_functions.append((func, entity)) + self._return_requests.append(entity) + else: + if not isinstance(item, str): + item = str(item.value) + self._return_requests.append(item) + def order_clause(self, order_clause): self._order_by = [] @@ -525,12 +556,82 @@ def skip_clause(self, skip): skip = int(skip[-1]) self._skip = skip + def extract_data(self, entity_name, results): + attribute = None + if '.' in entity_name: + requested_attribute = entity_name + entity_name, attribute = entity_name.split('.') + + if attribute: + # Flatten values from a multidigraph and collect all relevant attributes + attr_keys = {k: [] for item in results[requested_attribute] for k in item.keys()} + import ipdb;ipdb.set_trace() + attributes = [ + item.get(attribute, {}).values() for item in results[requested_attribute] + ] + import ipdb;ipdb.set_trace() + data = [ + value for sublist in [ + self._target_graph.nodes[node].get(attribute, {}).values() for node in self._target_graph.nodes() + ] for value in sublist + ] + else: + # return the list of nodes/edges + data = list(results.get(entity_name, [])) + return data + + def aggregate(self, func, results, entity, group_keys): + # Collect data based on group keys + grouped_data = {} + for i in range(len(results[entity])): + group_tuple = tuple(results[key][i] for key in group_keys if key in results) + if group_tuple not in grouped_data: + grouped_data[group_tuple] = [] + grouped_data[group_tuple].append(results[entity][i]) + + # Apply aggregation function + aggregate_results = {} + for group, data in grouped_data.items(): + # data => [{(0, 'paid'): 70, (1, 'paid'): 90}] + unqiue_labels = set([k[1] for rel in data for k in rel.keys()]) + collated_data = { + label: [(v or 0) for rel in data for k, v in rel.items() if k[1] == label] for label in unqiue_labels + } + if func == "COUNT": + count_data = {label: len(data) for label, data in collated_data.items()} + aggregate_results[group] = count_data + elif func == "SUM": + sum_data = {label: sum(data) for label, data in collated_data.items()} + aggregate_results[group] = sum_data + elif func == "AVG": + sum_data = {label: sum(data) for label, data in collated_data.items()} + count_data = {label: len(data) for label, data in collated_data.items()} + avg_data = {label: sum_data[label] / count_data[label] if count_data[label] > 0 else 0 for label in sum_data} + aggregate_results[group] = avg_data + elif func == "MAX": + max_data = {label: max(data) for label, data in collated_data.items()} + aggregate_results[group] = max_data + elif func == "MIN": + min_data = {label: min(data) for label, data in collated_data.items()} + aggregate_results[group] = min_data + + return aggregate_results + def returns(self, ignore_limit=False): results = self._lookup( self._return_requests + list(self._order_by_attributes), offset_limit=slice(0, None), ) + if len(self._aggregate_functions) > 0: + group_keys = [key for key in results.keys() if not any(key.endswith(func[1]) for func in self._aggregate_functions)] + + aggregated_results = {} + for func, entity in self._aggregate_functions: + aggregated_data = self.aggregate(func, results, entity, group_keys) + aggregated_results[f"{func}({entity})"] = aggregated_data + import ipdb; ipdb.set_trace() + results.update(aggregated_results) if self._order_by: results = self._apply_order_by(results) if self._distinct: From 72db2a8049abc4c2321933a38308c057ab704e64 Mon Sep 17 00:00:00 2001 From: Jack Boylan Date: Fri, 7 Jun 2024 18:20:13 +0000 Subject: [PATCH 17/23] Removes unused code --- grandcypher/__init__.py | 34 +--------------------------------- 1 file changed, 1 insertion(+), 33 deletions(-) diff --git a/grandcypher/__init__.py b/grandcypher/__init__.py index 94d2652..4afa48b 100644 --- a/grandcypher/__init__.py +++ b/grandcypher/__init__.py @@ -289,15 +289,6 @@ def _get_entity_from_host( if not edge_data: return None # print(f"Nothing found for {entity_name} {entity_attribute}") - # import ipdb;ipdb.set_trace() - # result = {} - # if entity_attribute: - # for rel_type, attrs in edge_data.items(): - # result[str(rel_type) + '.' + entity_attribute] = attrs.get(entity_attribute, None) - # else: - # for rel_type, attrs in edge_data.items(): - # result[str(rel_type)] = attrs - # return result if entity_attribute: # looking for edge attribute: if isinstance(host, nx.MultiDiGraph): @@ -556,29 +547,6 @@ def skip_clause(self, skip): skip = int(skip[-1]) self._skip = skip - def extract_data(self, entity_name, results): - attribute = None - if '.' in entity_name: - requested_attribute = entity_name - entity_name, attribute = entity_name.split('.') - - if attribute: - # Flatten values from a multidigraph and collect all relevant attributes - attr_keys = {k: [] for item in results[requested_attribute] for k in item.keys()} - import ipdb;ipdb.set_trace() - attributes = [ - item.get(attribute, {}).values() for item in results[requested_attribute] - ] - import ipdb;ipdb.set_trace() - data = [ - value for sublist in [ - self._target_graph.nodes[node].get(attribute, {}).values() for node in self._target_graph.nodes() - ] for value in sublist - ] - else: - # return the list of nodes/edges - data = list(results.get(entity_name, [])) - return data def aggregate(self, func, results, entity, group_keys): # Collect data based on group keys @@ -615,6 +583,7 @@ def aggregate(self, func, results, entity, group_keys): min_data = {label: min(data) for label, data in collated_data.items()} aggregate_results[group] = min_data + aggregate_results = [v for v in aggregate_results.values()] return aggregate_results def returns(self, ignore_limit=False): @@ -630,7 +599,6 @@ def returns(self, ignore_limit=False): for func, entity in self._aggregate_functions: aggregated_data = self.aggregate(func, results, entity, group_keys) aggregated_results[f"{func}({entity})"] = aggregated_data - import ipdb; ipdb.set_trace() results.update(aggregated_results) if self._order_by: results = self._apply_order_by(results) From aa007b9fae37fa7a10360960d3172ac78345f628 Mon Sep 17 00:00:00 2001 From: Jack Boylan Date: Mon, 10 Jun 2024 06:38:40 +0000 Subject: [PATCH 18/23] Adds agg function results to `_return_requests` --- grandcypher/__init__.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/grandcypher/__init__.py b/grandcypher/__init__.py index 4afa48b..8e4142c 100644 --- a/grandcypher/__init__.py +++ b/grandcypher/__init__.py @@ -598,7 +598,9 @@ def returns(self, ignore_limit=False): aggregated_results = {} for func, entity in self._aggregate_functions: aggregated_data = self.aggregate(func, results, entity, group_keys) - aggregated_results[f"{func}({entity})"] = aggregated_data + func_key = f"{func}({entity})" + aggregated_results[func_key] = aggregated_data + self._return_requests.append(func_key) results.update(aggregated_results) if self._order_by: results = self._apply_order_by(results) From 963fa8f769829824787ef1b713857f9e4e23d597 Mon Sep 17 00:00:00 2001 From: Jack Boylan Date: Mon, 10 Jun 2024 07:43:52 +0000 Subject: [PATCH 19/23] Handles `None` values appropriately for MIN and MAX --- grandcypher/__init__.py | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/grandcypher/__init__.py b/grandcypher/__init__.py index 8e4142c..8b96ea1 100644 --- a/grandcypher/__init__.py +++ b/grandcypher/__init__.py @@ -86,8 +86,8 @@ return_item : entity_id | aggregation_function | entity_id "." attribute_id aggregation_function : AGGREGATE_FUNC "(" entity_id ( "." attribute_id )? ")" -AGGREGATE_FUNC : "COUNT" | "SUM" | "AVG" | "MAX" | "MIN" -attribute_id : CNAME +AGGREGATE_FUNC : "COUNT" | "SUM" | "AVG" | "MAX" | "MIN" +attribute_id : CNAME distinct_return : "DISTINCT"i limit_clause : "limit"i NUMBER @@ -557,14 +557,26 @@ def aggregate(self, func, results, entity, group_keys): grouped_data[group_tuple] = [] grouped_data[group_tuple].append(results[entity][i]) + def _collate_data(data, unique_labels, func): + # for ["COUNT", "SUM", "AVG"], we treat None as 0 + if func in ["COUNT", "SUM", "AVG"]: + collated_data = { + label: [(v or 0) for rel in data for k, v in rel.items() if k[1] == label] for label in unique_labels + } + # for ["MAX", "MIN"], we treat None as non-existent + elif func in ["MAX", "MIN"]: + collated_data = { + label: [v for rel in data for k, v in rel.items() if (k[1] == label and v is not None)] for label in unique_labels + } + + return collated_data + # Apply aggregation function aggregate_results = {} for group, data in grouped_data.items(): # data => [{(0, 'paid'): 70, (1, 'paid'): 90}] - unqiue_labels = set([k[1] for rel in data for k in rel.keys()]) - collated_data = { - label: [(v or 0) for rel in data for k, v in rel.items() if k[1] == label] for label in unqiue_labels - } + unique_labels = set([k[1] for rel in data for k in rel.keys()]) + collated_data = _collate_data(data, unique_labels, func) if func == "COUNT": count_data = {label: len(data) for label, data in collated_data.items()} aggregate_results[group] = count_data From 346a044678aaa6541cae1a5da92fe8e35728edc0 Mon Sep 17 00:00:00 2001 From: Jack Boylan Date: Mon, 10 Jun 2024 08:00:04 +0000 Subject: [PATCH 20/23] Adds tests for agg functions and adjusts existing tests to new output --- grandcypher/test_queries.py | 138 ++++++++++++++++++++++++++++++++---- 1 file changed, 123 insertions(+), 15 deletions(-) diff --git a/grandcypher/test_queries.py b/grandcypher/test_queries.py index 9fc0bfb..3ab7bd0 100644 --- a/grandcypher/test_queries.py +++ b/grandcypher/test_queries.py @@ -909,8 +909,8 @@ def test_multiple_edges_specific_attribute(self): res = GrandCypher(host).run(qry) assert res["a.name"] == ["Alice"] assert res["b.name"] == ["Bob"] - assert res["r.years"] == [{0: 3, 1: 5, 2: None}] # should return None when attr is missing - + assert res["r.years"] == [{(0, 'colleague'): 3, (1, 'friend'): 5, (2, 'enemy'): None}] # should return None when attr is missing + def test_edge_directionality(self): host = nx.MultiDiGraph() host.add_node("a", name="Alice", age=25) @@ -926,9 +926,8 @@ def test_edge_directionality(self): res = GrandCypher(host).run(qry) assert res["a.name"] == ["Alice", "Bob"] assert res["b.name"] == ["Bob", "Alice"] - assert res["r.__labels__"] == [{0: {'friend'}}, {0: {'colleague'}, 1: {'mentor'}}] - assert res["r.years"] == [{0: 1}, {0: 2, 1: 4}] - + assert res["r.__labels__"] == [{(0, 'friend'): {'friend'}}, {(0, 'colleague'): {'colleague'}, (1, 'mentor'): {'mentor'}}] + assert res["r.years"] == [{(0, 'friend'): 1}, {(0, 'colleague'): 2, (1, 'mentor'): 4}] def test_query_with_missing_edge_attribute(self): host = nx.MultiDiGraph() @@ -947,7 +946,7 @@ def test_query_with_missing_edge_attribute(self): res = GrandCypher(host).run(qry) assert res["a.name"] == ["Alice", "Bob"] assert res["b.name"] == ["Charlie", "Charlie"] - assert res["r.duration"] == [{0: None}, {0: 10, 1: None}] # should return None when attr is missing + assert res["r.duration"] == [{(0, 'colleague'): None}, {(0, 'colleague'): 10, (1, 'mentor'): None}] qry = """ MATCH (a)-[r:colleague]->(b) @@ -956,7 +955,7 @@ def test_query_with_missing_edge_attribute(self): res = GrandCypher(host).run(qry) assert res["a.name"] == ["Alice", "Bob"] assert res["b.name"] == ["Charlie", "Charlie"] - assert res["r.years"] == [{0: 10}, {0: None, 1: 2}] + assert res["r.years"] == [{(0, 'colleague'): 10}, {(0, 'colleague'): None, (1, 'mentor'): 2}] qry = """ MATCH (a)-[r]->(b) @@ -965,8 +964,8 @@ def test_query_with_missing_edge_attribute(self): res = GrandCypher(host).run(qry) assert res["a.name"] == ['Alice', 'Alice', 'Bob'] assert res["b.name"] == ['Bob', 'Charlie', 'Charlie'] - assert res["r.__labels__"] == [{0: {'friend'}}, {0: {'colleague'}}, {0: {'colleague'}, 1: {'mentor'}}] - assert res["r.duration"] == [{0: None}, {0: None}, {0: 10, 1: None}] + assert res["r.__labels__"] == [{(0, 'friend'): {'friend'}}, {(0, 'colleague'): {'colleague'}}, {(0, 'colleague'): {'colleague'}, (1, 'mentor'): {'mentor'}}] + assert res["r.duration"] == [{(0, 'friend'): None}, {(0, 'colleague'): None}, {(0, 'colleague'): 10, (1, 'mentor'): None}] def test_multigraph_single_edge_where(self): host = nx.MultiDiGraph() @@ -986,9 +985,9 @@ def test_multigraph_single_edge_where(self): res = GrandCypher(host).run(qry) assert res["a.name"] == ["Alice", "Bob"] assert res["b.name"] == ["Bob", "Alice"] - assert res["r.__labels__"] == [{0: {'friend'}}, {0: {'colleague'}, 1: {'mentor'}}] - assert res["r.years"] == [{0: 1}, {0: 2, 1: 4}] - assert res["r.friendly"] == [{0: 'very'}, {0: None, 1: None}] + assert res["r.__labels__"] == [{(0, 'friend'): {'friend'}}, {(0, 'colleague'): {'colleague'}, (1, 'mentor'): {'mentor'}}] + assert res["r.years"] == [{(0, 'friend'): 1}, {(0, 'colleague'): 2, (1, 'mentor'): 4}] + assert res["r.friendly"] == [{(0, 'friend'): 'very'}, {(0, 'colleague'): None, (1, 'mentor'): None}] def test_multigraph_where_node_attribute(self): host = nx.MultiDiGraph() @@ -1008,9 +1007,118 @@ def test_multigraph_where_node_attribute(self): res = GrandCypher(host).run(qry) assert res["a.name"] == ["Alice"] assert res["b.name"] == ["Bob"] - assert res["r.__labels__"] == [{0: {'friend'}}] - assert res["r.years"] == [{0: 1}] - assert res["r.friendly"] == [{0: 'very'}] + assert res["r.__labels__"] == [{(0, 'friend'): {'friend'}}] + assert res["r.years"] == [{(0, 'friend'): 1}] + assert res["r.friendly"] == [{(0, 'friend'): 'very'}] + + def test_multigraph_multiple_same_edge_labels(self): + host = nx.MultiDiGraph() + host.add_node("a", name="Alice", age=25) + host.add_node("b", name="Bob", age=30) + host.add_edge("a", "b", __labels__={"paid"}, amount=12, date="12th June") + host.add_edge("b", "a", __labels__={"paid"}, amount=6) + host.add_edge("b", "a", __labels__={"paid"}, value=14) + host.add_edge("a", "b", __labels__={"friends"}, years=9) + host.add_edge("a", "b", __labels__={"paid"}, amount=40) + + qry = """ + MATCH (n)-[r:paid]->(m) + RETURN n.name, m.name, r.amount + """ + res = GrandCypher(host).run(qry) + assert res["n.name"] == ["Alice", "Bob"] + assert res["m.name"] == ["Bob", "Alice"] + # the second "paid" edge between Bob -> Alice has no "amount" attribute, so it should be None + assert res["r.amount"] == [{(0, 'paid'): 12, (1, 'friends'): None, (2, 'paid'): 40}, {(0, 'paid'): 6, (1, 'paid'): None}] + + def test_multigraph_aggregation_function_sum(self): + host = nx.MultiDiGraph() + host.add_node("a", name="Alice", age=25) + host.add_node("b", name="Bob", age=30) + host.add_edge("a", "b", __labels__={"paid"}, amount=12, date="12th June") + host.add_edge("b", "a", __labels__={"paid"}, amount=6) + host.add_edge("b", "a", __labels__={"paid"}, value=14) + host.add_edge("a", "b", __labels__={"friends"}, years=9) + host.add_edge("a", "b", __labels__={"paid"}, amount=40) + + qry = """ + MATCH (n)-[r:paid]->(m) + RETURN n.name, m.name, SUM(r.amount) + """ + res = GrandCypher(host).run(qry) + assert res['SUM(r.amount)'] == [{'friends': 0, 'paid': 52}, {'paid': 6}] + + def test_multigraph_aggregation_function_avg(self): + host = nx.MultiDiGraph() + host.add_node("a", name="Alice", age=25) + host.add_node("b", name="Bob", age=30) + host.add_edge("a", "b", __labels__={"paid"}, amount=12, date="12th June") + host.add_edge("b", "a", __labels__={"paid"}, amount=6, message="Thanks") + host.add_edge("a", "b", __labels__={"paid"}, amount=40) + + qry = """ + MATCH (n)-[r:paid]->(m) + RETURN n.name, m.name, AVG(r.amount) + """ + res = GrandCypher(host).run(qry) + assert res["AVG(r.amount)"] == [{'paid': 26}, {'paid': 6}] + + def test_multigraph_aggregation_function_min(self): + host = nx.MultiDiGraph() + host.add_node("a", name="Alice", age=25) + host.add_node("b", name="Bob", age=30) + host.add_edge("a", "b", __labels__={"paid"}, amount=40) + host.add_edge("b", "a", __labels__={"paid"}, amount=6) + host.add_edge("a", "b", __labels__={"paid"}, value=4) + host.add_edge("a", "b", __labels__={"paid"}, amount=12) + + qry = """ + MATCH (n)-[r:paid]->(m) + RETURN n.name, m.name, MIN(r.amount) + """ + res = GrandCypher(host).run(qry) + assert res["MIN(r.amount)"] == [{'paid': 12}, {'paid': 6}] + + def test_multigraph_aggregation_function_max(self): + host = nx.MultiDiGraph() + host.add_node("a", name="Alice", age=25) + host.add_node("b", name="Bob", age=30) + host.add_node("c", name="Christine") + host.add_edge("a", "b", __labels__={"paid"}, amount=40) + host.add_edge("a", "b", __labels__={"paid"}, amount=12) + host.add_edge("a", "c", __labels__={"owes"}, amount=39) + host.add_edge("b", "a", __labels__={"paid"}, amount=6) + + qry = """ + MATCH (n)-[r:paid]->(m) + RETURN n.name, m.name, MAX(r.amount) + """ + res = GrandCypher(host).run(qry) + assert res["MAX(r.amount)"] == [{'paid': 40}, {'paid': 6}] + + qry = """ + MATCH (n)-[r:owes]->(m) + RETURN n.name, m.name, MAX(r.amount) + """ + res = GrandCypher(host).run(qry) + assert res["MAX(r.amount)"] == [{'owes': 39}] + + def test_multigraph_aggregation_function_count(self): + host = nx.MultiDiGraph() + host.add_node("a", name="Alice", age=25) + host.add_node("b", name="Bob", age=30) + host.add_node("c", name="Christine") + host.add_edge("a", "b", __labels__={"paid"}, amount=40) + host.add_edge("a", "b", __labels__={"paid"}, amount=12) + host.add_edge("a", "c", __labels__={"owes"}, amount=39) + host.add_edge("b", "a", __labels__={"paid"}, amount=6) + + qry = """ + MATCH (n)-[r:paid]->(m) + RETURN n.name, m.name, COUNT(r.amount) + """ + res = GrandCypher(host).run(qry) + assert res["COUNT(r.amount)"] == [{'paid': 2}, {'paid': 1}] class TestVariableLengthRelationship: From 3d7ebaecafc34a4a75090bbf2d98617059c0b66a Mon Sep 17 00:00:00 2001 From: Jack Boylan Date: Mon, 10 Jun 2024 08:14:12 +0000 Subject: [PATCH 21/23] Adds examples page --- README.md | 3 +++ examples.md | 66 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 69 insertions(+) create mode 100644 examples.md diff --git a/README.md b/README.md index 07a583f..c82ab92 100644 --- a/README.md +++ b/README.md @@ -27,6 +27,8 @@ RETURN A.club, B.club """) ``` +See [examples.md](examples.md) for more! + ### Example Usage with SQL Create your own "Sqlite for Neo4j"! This example uses [grand-graph](https://github.com/aplbrain/grand) to run queries in SQL: @@ -81,6 +83,7 @@ RETURN | Graph mutations (e.g. `DELETE`, `SET`,...) | 🛣 | | | `DISTINCT` | ✅ Thanks @jackboyla! | | | `ORDER BY` | ✅ Thanks @jackboyla! | | +| Aggregation functions (`COUNT`, `SUM`, `MIN`, `MAX`, `AVG`) | ✅ Thanks @jackboyla! | | | | | | | -------------- | -------------- | ---------------- | diff --git a/examples.md b/examples.md new file mode 100644 index 0000000..0122329 --- /dev/null +++ b/examples.md @@ -0,0 +1,66 @@ + +## Multigraph + +```python +from grandcypher import GrandCypher +import networkx as nx + +host = nx.MultiDiGraph() +host.add_node("a", name="Alice", age=25) +host.add_node("b", name="Bob", age=30) +host.add_edge("a", "b", __labels__={"paid"}, amount=12, date="12th June") +host.add_edge("b", "a", __labels__={"paid"}, amount=6) +host.add_edge("b", "a", __labels__={"paid"}, value=14) +host.add_edge("a", "b", __labels__={"friends"}, years=9) +host.add_edge("a", "b", __labels__={"paid"}, amount=40) + +qry = """ +MATCH (n)-[r:paid]->(m) +RETURN n.name, m.name, r.amount +""" +res = GrandCypher(host).run(qry) +print(res) + +''' +{ + 'n.name': ['Alice', 'Bob'], + 'm.name': ['Bob', 'Alice'], + 'r.amount': [{(0, 'paid'): 12, (1, 'friends'): None, (2, 'paid'): 40}, {(0, 'paid'): 6, (1, 'paid'): None}] +} +''' +``` + +## Aggregation Functions + +```python +from grandcypher import GrandCypher +import networkx as nx + +host = nx.MultiDiGraph() +host.add_node("a", name="Alice", age=25) +host.add_node("b", name="Bob", age=30) +host.add_edge("a", "b", __labels__={"paid"}, amount=12, date="12th June") +host.add_edge("b", "a", __labels__={"paid"}, amount=6) +host.add_edge("b", "a", __labels__={"paid"}, value=14) +host.add_edge("a", "b", __labels__={"friends"}, years=9) +host.add_edge("a", "b", __labels__={"paid"}, amount=40) + +qry = """ +MATCH (n)-[r:paid]->(m) +RETURN n.name, m.name, SUM(r.amount) +""" +res = GrandCypher(host).run(qry) +print(res) + +''' +{ + 'n.name': ['Alice', 'Bob'], + 'm.name': ['Bob', 'Alice'], + 'SUM(r.amount)': [{'paid': 52, 'friends': 0}, {'paid': 6}] +} +''' +``` + + + + From ddd4db4774192da37f11e7a18db9b5a349fab9bb Mon Sep 17 00:00:00 2001 From: Jack Boylan Date: Mon, 10 Jun 2024 08:38:12 +0000 Subject: [PATCH 22/23] Adds test for multiple agg functions --- grandcypher/test_queries.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/grandcypher/test_queries.py b/grandcypher/test_queries.py index 3ab7bd0..afea528 100644 --- a/grandcypher/test_queries.py +++ b/grandcypher/test_queries.py @@ -1120,6 +1120,24 @@ def test_multigraph_aggregation_function_count(self): res = GrandCypher(host).run(qry) assert res["COUNT(r.amount)"] == [{'paid': 2}, {'paid': 1}] + def test_multigraph_multiple_aggregation_functions(self): + host = nx.MultiDiGraph() + host.add_node("a", name="Alice", age=25) + host.add_node("b", name="Bob", age=30) + host.add_node("c", name="Christine") + host.add_edge("a", "b", __labels__={"paid"}, amount=40) + host.add_edge("a", "b", __labels__={"paid"}, amount=12) + host.add_edge("a", "c", __labels__={"owes"}, amount=39) + host.add_edge("b", "a", __labels__={"paid"}, amount=6) + + qry = """ + MATCH (n)-[r:paid]->(m) + RETURN n.name, m.name, COUNT(r.amount), SUM(r.amount) + """ + res = GrandCypher(host).run(qry) + assert res["COUNT(r.amount)"] == [{'paid': 2}, {'paid': 1}] + assert res["SUM(r.amount)"] == [{'paid': 52}, {'paid': 6}] + class TestVariableLengthRelationship: @pytest.mark.parametrize("graph_type", ACCEPTED_GRAPH_TYPES) From d37f3d20a1aa88bea5da62faf07393d03b14b19e Mon Sep 17 00:00:00 2001 From: Jack Boylan Date: Mon, 10 Jun 2024 08:43:25 +0000 Subject: [PATCH 23/23] Removes commented code --- grandcypher/__init__.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/grandcypher/__init__.py b/grandcypher/__init__.py index 8b96ea1..e582af2 100644 --- a/grandcypher/__init__.py +++ b/grandcypher/__init__.py @@ -491,11 +491,8 @@ def _lookup(self, data_paths: List[str], offset_limit) -> Dict[str, List]: for r in ret: r_attr = {} for i, v in r.items(): - # r_attr[list(v.get('__labels__'))[0]] = v.get(entity_attribute, None) - # [{'paid': 90}, {'paid': 650, 'friend': None}] r_attr[(i, list(v.get('__labels__'))[0])] = v.get(entity_attribute, None) - # [{0: 70, 1: 90}, {0: 400, 1: None, 2: 650}] - # [{(0, 'paid'): 70, (1, 'paid'): 90}, {(0, 'paid'): 400, (1, 'friend'): None, (2, 'paid'): 650}] + # eg, [{(0, 'paid'): 70, (1, 'paid'): 90}, {(0, 'paid'): 400, (1, 'friend'): None, (2, 'paid'): 650}] ret_with_attr.append(r_attr) ret = ret_with_attr