diff --git a/grand/backends/_dataframe.py b/grand/backends/_dataframe.py index 219232f..4beed47 100644 --- a/grand/backends/_dataframe.py +++ b/grand/backends/_dataframe.py @@ -120,11 +120,13 @@ def all_nodes_as_iterable(self, include_metadata: bool = False): if self._node_df is not None: return [ ( - node_id, - row.to_dict(), + ( + node_id, + row.to_dict(), + ) + if include_metadata + else node_id ) - if include_metadata - else node_id for node_id, row in self._node_df.iterrows() ] @@ -287,14 +289,12 @@ def get_edge_by_id(self, u: Hashable, v: Hashable): """ if self._directed: - return ( - self._edge_df[ - (self._edge_df[self._edge_df_source_column] == u) - & (self._edge_df[self._edge_df_target_column] == v) - ] - .iloc[0] - .to_dict() - ) + result = self._edge_df[ + (self._edge_df[self._edge_df_source_column] == u) + & (self._edge_df[self._edge_df_target_column] == v) + ] + if len(result): + return self._edge_as_dict(result.iloc[0]) else: left = self._edge_df[ @@ -355,9 +355,11 @@ def get_node_neighbors(self, u: Hashable, include_metadata: bool = False): else: return iter( [ - row[self._edge_df_source_column] - if row[self._edge_df_source_column] != u - else row[self._edge_df_target_column] + ( + row[self._edge_df_source_column] + if row[self._edge_df_source_column] != u + else row[self._edge_df_target_column] + ) for _, row in self._edge_df[ (self._edge_df[self._edge_df_source_column] == u) | (self._edge_df[self._edge_df_target_column] == u) @@ -430,9 +432,11 @@ def get_node_predecessors(self, u: Hashable, include_metadata: bool = False): else: return iter( [ - row[self._edge_df_source_column] - if row[self._edge_df_target_column] != u - else row[self._edge_df_target_column] + ( + row[self._edge_df_source_column] + if row[self._edge_df_target_column] != u + else row[self._edge_df_target_column] + ) for _, row in self._edge_df[ (self._edge_df[self._edge_df_target_column] == u) | (self._edge_df[self._edge_df_source_column] == u) @@ -460,6 +464,19 @@ def get_node_count(self) -> int: ) ) + def get_edge_count(self) -> int: + """ + Get an integer count of the number of edges in this graph. + + Arguments: + None + + Returns: + int: The count of edges + + """ + return len(self._edge_df) + def ingest_from_edgelist_dataframe( self, edgelist: pd.DataFrame, source_column: str, target_column: str ) -> dict: diff --git a/grand/backends/_dynamodb.py b/grand/backends/_dynamodb.py index 39758d0..98e8eae 100644 --- a/grand/backends/_dynamodb.py +++ b/grand/backends/_dynamodb.py @@ -191,11 +191,13 @@ def all_nodes_as_iterable(self, include_metadata: bool = False) -> Collection: """ return [ ( - node[self._primary_key], - {k: v for k, v in node.items() if k not in [self._primary_key]}, + ( + node[self._primary_key], + {k: v for k, v in node.items() if k not in [self._primary_key]}, + ) + if include_metadata + else node[self._primary_key] ) - if include_metadata - else node[self._primary_key] for node in self._scan_table(self._node_table) ] @@ -264,9 +266,11 @@ def all_edges_as_iterable(self, include_metadata: bool = False) -> Collection: """ return [ - (edge[self._edge_source_key], edge[self._edge_target_key], edge) - if include_metadata - else (edge[self._edge_source_key], edge[self._edge_target_key]) + ( + (edge[self._edge_source_key], edge[self._edge_target_key], edge) + if include_metadata + else (edge[self._edge_source_key], edge[self._edge_target_key]) + ) for edge in self._scan_table(self._edge_table) ] @@ -435,6 +439,21 @@ def get_node_count(self) -> int: "ItemCount" ] + def get_edge_count(self) -> int: + """ + Get an integer count of the number of edges in this graph. + + Arguments: + None + + Returns: + int: The count of edges + + """ + return self._client.describe_table(TableName=self._edge_table_name)["Table"][ + "ItemCount" + ] + # Ingesting def ingest_from_edgelist_dataframe( diff --git a/grand/backends/_gremlin.py b/grand/backends/_gremlin.py index d408f4b..bb6f8a2 100644 --- a/grand/backends/_gremlin.py +++ b/grand/backends/_gremlin.py @@ -314,5 +314,18 @@ def get_node_count(self) -> int: """ return self._g.V().count().toList()[0] + def get_edge_count(self) -> int: + """ + Get an integer count of the number of edges in this graph. + + Arguments: + None + + Returns: + int: The count of edges + + """ + return self._g.E().count().toList()[0] + def teardown(self) -> None: self._g.V().drop().toList() diff --git a/grand/backends/_igraph.py b/grand/backends/_igraph.py index 0594d70..bd7e82d 100644 --- a/grand/backends/_igraph.py +++ b/grand/backends/_igraph.py @@ -258,3 +258,16 @@ def get_node_count(self) -> int: """ return self._ig.vcount() + + def get_edge_count(self) -> int: + """ + Get an integer count of the number of edges in this graph. + + Arguments: + None + + Returns: + int: The count of edges + + """ + return self._ig.ecount() diff --git a/grand/backends/_networkit.py b/grand/backends/_networkit.py index 4d40e7a..d600cf0 100644 --- a/grand/backends/_networkit.py +++ b/grand/backends/_networkit.py @@ -281,7 +281,7 @@ def get_node_predecessors( [self._names.get_name(i) for i in self._nk_graph.iterInNeighbors(my_id)] ) - def get_node_count(self) -> Iterable: + def get_node_count(self) -> int: """ Get an integer count of the number of nodes in this graph. @@ -292,4 +292,17 @@ def get_node_count(self) -> Iterable: int: The count of nodes """ - return len([i for i in self.all_nodes_as_iterable()]) + return self._nk_graph.numberOfNodes() + + def get_edge_count(self) -> int: + """ + Get an integer count of the number of edges in this graph. + + Arguments: + None + + Returns: + int: The count of edges + + """ + return self._nk_graph.numberOfEdges() diff --git a/grand/backends/_networkx.py b/grand/backends/_networkx.py index b5cc9bb..b012fef 100644 --- a/grand/backends/_networkx.py +++ b/grand/backends/_networkx.py @@ -168,6 +168,19 @@ def get_node_count(self) -> int: """ return len(self._nx_graph) + def get_edge_count(self) -> int: + """ + Get an integer count of the number of edges in this graph. + + Arguments: + None + + Returns: + int: The count of edges + + """ + return len(self._nx_graph.edges) + def ingest_from_edgelist_dataframe( self, edgelist: pd.DataFrame, source_column: str, target_column: str ) -> dict: @@ -200,6 +213,3 @@ def ingest_from_edgelist_dataframe( "edge_count": len(edgelist), "edge_duration": time.time() - tic, } - - def teardown(self) -> None: - return diff --git a/grand/backends/_sqlbackend.py b/grand/backends/_sqlbackend.py index 99996d4..8d565e3 100644 --- a/grand/backends/_sqlbackend.py +++ b/grand/backends/_sqlbackend.py @@ -74,11 +74,11 @@ def __init__( self._node_table.create(self._engine, checkfirst=True) source_column = sqlalchemy.Column( - self._edge_source_key, sqlalchemy.String(_DEFAULT_SQL_STR_LEN) + self._edge_source_key, sqlalchemy.String(_DEFAULT_SQL_STR_LEN) ) target_column = sqlalchemy.Column( - self._edge_target_key, sqlalchemy.String(_DEFAULT_SQL_STR_LEN) + self._edge_target_key, sqlalchemy.String(_DEFAULT_SQL_STR_LEN) ) # Create edges table @@ -92,7 +92,7 @@ def __init__( ), sqlalchemy.Column("_metadata", sqlalchemy.JSON), source_column, - target_column + target_column, ) self._edge_table.create(self._engine, checkfirst=True) @@ -156,10 +156,13 @@ def add_node(self, node_name: Hashable, metadata: dict) -> Hashable: return node_name def add_nodes_from(self, nodes_for_adding, **attr): - nodes = [{ - self._primary_key: node, - "_metadata": {**attr, **metadata}, - } for node, metadata in nodes_for_adding] + nodes = [ + { + self._primary_key: node, + "_metadata": {**attr, **metadata}, + } + for node, metadata in nodes_for_adding + ] self._connection.execute(self._node_table.insert(), nodes) @@ -204,7 +207,9 @@ def all_nodes_as_iterable(self, include_metadata: bool = False) -> Generator: if include_metadata: sql = self._node_table.select() else: - sql = self._node_table.select().with_only_columns(self._node_table.c[self._primary_key]) + sql = self._node_table.select().with_only_columns( + self._node_table.c[self._primary_key] + ) results = [] for x in self._connection.execute(sql): @@ -277,12 +282,15 @@ def add_edge(self, u: Hashable, v: Hashable, metadata: dict): return pk def add_edges_from(self, ebunch_to_add, **attr): - edges = [{ - self._primary_key: f"__{u}__{v}", - self._edge_source_key: u, - self._edge_target_key: v, - "_metadata": {**attr, **metadata}, - } for u, v, metadata in ebunch_to_add] + edges = [ + { + self._primary_key: f"__{u}__{v}", + self._edge_source_key: u, + self._edge_target_key: v, + "_metadata": {**attr, **metadata}, + } + for u, v, metadata in ebunch_to_add + ] self._connection.execute(self._edge_table.insert(), edges) @@ -299,7 +307,7 @@ def all_edges_as_iterable(self, include_metadata: bool = False) -> Generator: columns = [ self._node_table.c[self._edge_source_key], - self._node_table.c[self._edge_target_key] + self._node_table.c[self._edge_target_key], ] if include_metadata: @@ -345,28 +353,26 @@ def get_edge_by_id(self, u: Hashable, v: Hashable): """ if self._directed: pk = f"__{u}__{v}" - return ( - self._connection.execute( - self._edge_table.select().where( - self._edge_table.c[self._primary_key] == pk - ) + result = self._connection.execute( + self._edge_table.select().where( + self._edge_table.c[self._primary_key] == pk ) - .fetchone() - ._metadata - ) + ).fetchone() + if result: + return result._metadata + raise KeyError(f"Edge {u}-{v} not found.") else: - return ( - self._connection.execute( - self._edge_table.select().where( - or_( - (self._edge_table.c[self._primary_key] == f"__{u}__{v}"), - (self._edge_table.c[self._primary_key] == f"__{v}__{u}"), - ) + result = self._connection.execute( + self._edge_table.select().where( + or_( + (self._edge_table.c[self._primary_key] == f"__{u}__{v}"), + (self._edge_table.c[self._primary_key] == f"__{v}__{u}"), ) ) - .fetchone() - ._metadata - ) + ).fetchone() + if result: + return result._metadata + raise KeyError(f"Edge {u}-{v} not found.") def get_node_neighbors( self, u: Hashable, include_metadata: bool = False @@ -384,18 +390,20 @@ def get_node_neighbors( if self._directed: res = self._connection.execute( - self._edge_table.select().where( - self._edge_table.c[self._edge_source_key] == str(u) - ).order_by(self._edge_table.c[self._primary_key]) + self._edge_table.select() + .where(self._edge_table.c[self._edge_source_key] == str(u)) + .order_by(self._edge_table.c[self._primary_key]) ).fetchall() else: res = self._connection.execute( - self._edge_table.select().where( + self._edge_table.select() + .where( or_( (self._edge_table.c[self._edge_source_key] == str(u)), (self._edge_table.c[self._edge_target_key] == str(u)), ) - ).order_by(self._edge_table.c[self._primary_key]) + ) + .order_by(self._edge_table.c[self._primary_key]) ).fetchall() res = [x._asdict() for x in res] @@ -436,18 +444,20 @@ def get_node_predecessors( """ if self._directed: res = self._connection.execute( - self._edge_table.select().where( - self._edge_table.c[self._edge_target_key] == str(u) - ).order_by(self._edge_table.c[self._primary_key]) + self._edge_table.select() + .where(self._edge_table.c[self._edge_target_key] == str(u)) + .order_by(self._edge_table.c[self._primary_key]) ).fetchall() else: res = self._connection.execute( - self._edge_table.select().where( + self._edge_table.select() + .where( or_( (self._edge_table.c[self._edge_target_key] == str(u)), (self._edge_table.c[self._edge_source_key] == str(u)), ) - ).order_by(self._edge_table.c[self._primary_key]) + ) + .order_by(self._edge_table.c[self._primary_key]) ).fetchall() res = [x._asdict() for x in res] @@ -473,7 +483,7 @@ def get_node_predecessors( ] ) - def get_node_count(self) -> Iterable: + def get_node_count(self) -> int: """ Get an integer count of the number of nodes in this graph. @@ -488,6 +498,21 @@ def get_node_count(self) -> Iterable: select(func.count()).select_from(self._node_table) ).scalar() + def get_edge_count(self) -> int: + """ + Get an integer count of the number of edges in this graph. + + Arguments: + None + + Returns: + int: The count of edges + + """ + return self._connection.execute( + select(func.count()).select_from(self._edge_table) + ).scalar() + def out_degrees(self, nbunch=None): """ Return the in-degree of each node in the graph. @@ -503,7 +528,9 @@ def out_degrees(self, nbunch=None): if nbunch is None: where_clause = None elif isinstance(nbunch, (list, tuple)): - where_clause = self._edge_table.c[self._edge_source_key].in_([str(x) for x in nbunch]) + where_clause = self._edge_table.c[self._edge_source_key].in_( + [str(x) for x in nbunch] + ) else: # single node: where_clause = self._edge_table.c[self._edge_source_key] == str(nbunch) @@ -524,10 +551,7 @@ def out_degrees(self, nbunch=None): if where_clause is not None: query = query.where(where_clause) - results = { - r[0]: r[1] - for r in self._connection.execute(query) - } + results = {r[0]: r[1] for r in self._connection.execute(query)} if nbunch and not isinstance(nbunch, (list, tuple)): return results.get(nbunch, 0) @@ -548,7 +572,9 @@ def in_degrees(self, nbunch=None): if nbunch is None: where_clause = None elif isinstance(nbunch, (list, tuple)): - where_clause = self._edge_table.c[self._edge_target_key].in_([str(x) for x in nbunch]) + where_clause = self._edge_table.c[self._edge_target_key].in_( + [str(x) for x in nbunch] + ) else: # single node: where_clause = self._edge_table.c[self._edge_target_key] == str(nbunch) @@ -569,10 +595,7 @@ def in_degrees(self, nbunch=None): if where_clause is not None: query = query.where(where_clause) - results = { - r[0]: r[1] - for r in self._connection.execute(query) - } + results = {r[0]: r[1] for r in self._connection.execute(query)} if nbunch and not isinstance(nbunch, (list, tuple)): return results.get(nbunch, 0) diff --git a/grand/backends/backend.py b/grand/backends/backend.py index 83e0b24..53aaa0f 100644 --- a/grand/backends/backend.py +++ b/grand/backends/backend.py @@ -118,6 +118,22 @@ def has_node(self, u: Hashable) -> bool: except KeyError: return False + def has_edge(self, u: Hashable, v: Hashable) -> bool: + """ + Return true if the edge exists in the graph. + + Arguments: + u (Hashable): The source node ID + v (Hashable): The target node ID + + Returns: + bool: True if the edge exists + """ + try: + return self.get_edge_by_id(u, v) is not None + except KeyError: + return False + def add_edge(self, u: Hashable, v: Hashable, metadata: dict): """ Add a new edge to the graph between two nodes. @@ -222,6 +238,19 @@ def get_node_count(self) -> int: """ return len([i for i in self.all_nodes_as_iterable()]) + def get_edge_count(self) -> int: + """ + Get an integer count of the number of edges in this graph. + + Arguments: + None + + Returns: + int: The count of edges + + """ + return len([i for i in self.all_edges_as_iterable()]) + def degree(self, u: Hashable) -> int: """ Get the degree of a node. @@ -282,18 +311,15 @@ def out_degrees(self, nbunch=None) -> Collection: class CachedBackend(Backend): - """ A proxy Backend that serves as a cache for any other grand.Backend. """ - def __init__(self, backend: Backend): - ... + def __init__(self, backend: Backend): ... class InMemoryCachedBackend(CachedBackend): - """ A proxy Backend that serves as a cache for any other grand.Backend. diff --git a/grand/backends/test_backends.py b/grand/backends/test_backends.py index 18f4c52..d08fcac 100644 --- a/grand/backends/test_backends.py +++ b/grand/backends/test_backends.py @@ -43,6 +43,7 @@ os.environ.get("TEST_NETWORKXBACKEND", default="1") != "1", reason="NetworkX Backend skipped because $TEST_NETWORKXBACKEND != 1.", ), + id="NetworkXBackend", ), ] backend_test_params = [ @@ -52,6 +53,7 @@ os.environ.get("TEST_DATAFRAMEBACKEND", default="1") != "1", reason="DataFrameBackend skipped because $TEST_DATAFRAMEBACKEND != 1.", ), + id="DataFrameBackend", ), ] @@ -63,6 +65,7 @@ os.environ.get("TEST_DYNAMODB", default="1") != "1", reason="DynamoDB Backend skipped because $TEST_DYNAMODB != 0 or boto3 is not installed", ), + id="DynamoDBBackend", ), ) @@ -74,6 +77,7 @@ os.environ.get("TEST_SQLBACKEND", default="1") != "1", reason="SQL Backend skipped because $TEST_SQLBACKEND != 1 or sqlalchemy is not installed.", ), + id="SQLBackend", ), ) if _CAN_IMPORT_IGRAPH: @@ -84,6 +88,7 @@ os.environ.get("TEST_IGRAPHBACKEND", default="1") != "1", reason="IGraph Backend skipped because $TEST_IGRAPHBACKEND != 1 or igraph is not installed.", ), + id="IGraphBackend", ), ) if _CAN_IMPORT_NETWORKIT: @@ -94,6 +99,7 @@ os.environ.get("TEST_NETWORKIT", default="1") != "1", reason="Networkit Backend skipped because $TEST_NETWORKIT != 1 or networkit is not installed.", ), + id="NetworkitBackend", ), ) @@ -107,6 +113,7 @@ os.environ.get("TEST_NETWORKITBACKEND") != "1", reason="Networkit Backend skipped because $TEST_NETWORKITBACKEND != 1.", ), + id="NetworkitBackend", ), ) @@ -120,6 +127,7 @@ os.environ.get("TEST_IGRAPHBACKEND") != "1", reason="Networkit Backend skipped because $TEST_IGRAPHBACKEND != 1.", ), + id="IGraphBackend", ), ) @@ -370,6 +378,13 @@ def test_directed_degree_multiple(self, backend): assert G.nx.in_degree("bar") == 1 assert G.nx.in_degree("baz") == 1 + def test_node_count(self, backend): + backend, kwargs = backend + G = Graph(backend=backend(**kwargs)) + G.nx.add_node("foo", bar=True) + G.nx.add_node("bar", foo=True) + assert len(G.nx) == 2 + @pytest.mark.benchmark @pytest.mark.parametrize("backend", backend_test_params) diff --git a/grand/dialects/__init__.py b/grand/dialects/__init__.py index 29c012a..0efc138 100644 --- a/grand/dialects/__init__.py +++ b/grand/dialects/__init__.py @@ -180,6 +180,19 @@ def out_degree(self, nbunch=None): def is_directed(self): return self.parent.backend.is_directed() + def __len__(self): + return self.parent.backend.get_node_count() + + def number_of_nodes(self): + return self.parent.backend.get_node_count() + + def number_of_edges(self, u=None, v=None): + if u is None and v is None: + return self.parent.backend.get_edge_count() + # Get the number of edges between u and v. because we don't support + # multigraphs, this is 1 if there is an edge, 0 otherwise. + return 1 if self.parent.backend.has_edge(u, v) else 0 + class IGraphDialect(nx.Graph): """ @@ -218,7 +231,7 @@ def es(self): ] def add_edges(self, edgelist: List[Tuple[Hashable, Hashable]]): - for (u, v) in edgelist: + for u, v in edgelist: self.parent.backend.add_edge(u, v, {}) def get_edgelist(self): @@ -279,7 +292,7 @@ def degreeOut(self, v): def density(self): # TODO: implement backend#degree? - E = len(self.parent.backend.all_edges_as_iterable()) + E = self.parent.backend.get_edge_count() V = self.parent.backend.get_node_count() if self.parent.backend.is_directed(): @@ -291,7 +304,7 @@ def numberOfNodes(self) -> int: return self.parent.backend.get_node_count() def numberOfEdges(self) -> int: - return len(self.parent.backend.all_edges_as_iterable()) + return self.parent.backend.get_edge_count() def removeEdge(self, u, v) -> None: raise NotImplementedError