Skip to content

Commit

Permalink
Avoid spending gigabytes of memory on statistics for -as-needed dropp…
Browse files Browse the repository at this point in the history
…ing (#50)

* Downsample indices and areas during tiling if they get too big

* Cap the indices rather than downsampling them
  • Loading branch information
e-n-f authored Dec 20, 2022
1 parent 2b7bbbd commit 0799297
Show file tree
Hide file tree
Showing 3 changed files with 52 additions and 7 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
## 2.16.0

* During tiling, limit the size of the statistics that are kept for -as-needed calculations, because they can get quite large for sources with hundreds of millions of features.

## 2.15.2

* Change tile hash function to fnv1a
Expand Down
53 changes: 47 additions & 6 deletions tile.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1829,6 +1829,27 @@ static bool line_is_too_small(drawvec const &geometry, int z, int detail) {
return true;
}

// Keep only a sample of 100K extents for feature dropping,
// to avoid spending lots of memory on a complete list when there are
// hundreds of millions of features.
template <class T>
void add_sample_to(std::vector<T> &vals, T val, size_t &increment, size_t seq) {
if (seq % increment == 0) {
vals.push_back(val);

if (vals.size() > 100000) {
std::vector<T> tmp;

for (size_t i = 0; i < vals.size(); i += 2) {
tmp.push_back(vals[i]);
}

increment *= 2;
vals = tmp;
}
}
}

long long write_tile(FILE *geoms, std::atomic<long long> *geompos_in, char *metabase, char *stringpool, int z, const unsigned tx, const unsigned ty, const int detail, int min_detail, sqlite3 *outdb, const char *outdir, int buffer, const char *fname, FILE **geomfile, int minzoom, int maxzoom, double todo, std::atomic<long long> *along, long long alongminus, double gamma, int child_shards, long long *meta_off, long long *pool_off, unsigned *initial_x, unsigned *initial_y, std::atomic<int> *running, double simplification, std::vector<std::map<std::string, layermap_entry>> *layermaps, std::vector<std::vector<std::string>> *layer_unmaps, size_t tiling_seg, size_t pass, unsigned long long mingap, long long minextent, double fraction, const char *prefilter, const char *postfilter, struct json_object *filter, write_tile_args *arg, atomic_strategy *strategy) {
double merge_fraction = 1;
double mingap_fraction = 1;
Expand Down Expand Up @@ -1878,8 +1899,11 @@ long long write_tile(FILE *geoms, std::atomic<long long> *geompos_in, char *meta

std::vector<struct partial> partials;
std::map<std::string, std::vector<coalesce>> layers;

std::vector<unsigned long long> indices;
std::vector<long long> extents;
size_t extents_increment = 1;

double coalesced_area = 0;
drawvec shared_nodes;

Expand Down Expand Up @@ -1974,7 +1998,7 @@ long long write_tile(FILE *geoms, std::atomic<long long> *geompos_in, char *meta
prefilter_jp = json_begin_file(prefilter_read_fp);
}

while (1) {
for (size_t seq = 0; ; seq++) {
serial_feature sf;
ssize_t which_partial = -1;

Expand Down Expand Up @@ -2018,8 +2042,18 @@ long long write_tile(FILE *geoms, std::atomic<long long> *geompos_in, char *meta
}
}

// Cap the indices, rather than sampling them like extents (areas),
// because choose_mingap cares about the distance between *surviving*
// features, not between *original* features, so we can't just store
// gaps rather than indices to be able to downsample them fairly.
// Hopefully the first 100K features in the tile are reasonably
// representative of the other features in the tile.
const size_t MAX_INDICES = 100000;

if (additional[A_CLUSTER_DENSEST_AS_NEEDED] || cluster_distance != 0) {
indices.push_back(sf.index);
if (indices.size() < MAX_INDICES) {
indices.push_back(sf.index);
}
if ((sf.index < merge_previndex || sf.index - merge_previndex < mingap) && find_partial(partials, sf, which_partial, layer_unmaps, LLONG_MAX)) {
partials[which_partial].clustered++;

Expand All @@ -2040,14 +2074,18 @@ long long write_tile(FILE *geoms, std::atomic<long long> *geompos_in, char *meta
continue;
}
} else if (additional[A_DROP_DENSEST_AS_NEEDED]) {
indices.push_back(sf.index);
if (indices.size() < MAX_INDICES) {
indices.push_back(sf.index);
}
if (sf.index - merge_previndex < mingap && find_partial(partials, sf, which_partial, layer_unmaps, LLONG_MAX)) {
preserve_attributes(arg->attribute_accum, sf, stringpool, pool_off, partials[which_partial]);
strategy->dropped_as_needed++;
continue;
}
} else if (additional[A_COALESCE_DENSEST_AS_NEEDED]) {
indices.push_back(sf.index);
if (indices.size() < MAX_INDICES) {
indices.push_back(sf.index);
}
if (sf.index - merge_previndex < mingap && find_partial(partials, sf, which_partial, layer_unmaps, LLONG_MAX)) {
partials[which_partial].geoms.push_back(sf.geometry);
partials[which_partial].coalesced = true;
Expand All @@ -2057,14 +2095,14 @@ long long write_tile(FILE *geoms, std::atomic<long long> *geompos_in, char *meta
continue;
}
} else if (additional[A_DROP_SMALLEST_AS_NEEDED]) {
extents.push_back(sf.extent);
add_sample_to(extents, sf.extent, extents_increment, seq);
if (sf.extent + coalesced_area <= minextent && find_partial(partials, sf, which_partial, layer_unmaps, minextent)) {
preserve_attributes(arg->attribute_accum, sf, stringpool, pool_off, partials[which_partial]);
strategy->dropped_as_needed++;
continue;
}
} else if (additional[A_COALESCE_SMALLEST_AS_NEEDED]) {
extents.push_back(sf.extent);
add_sample_to(extents, sf.extent, extents_increment, seq);
if (sf.extent + coalesced_area <= minextent && find_partial(partials, sf, which_partial, layer_unmaps, minextent)) {
partials[which_partial].geoms.push_back(sf.geometry);
partials[which_partial].coalesced = true;
Expand Down Expand Up @@ -2109,6 +2147,9 @@ long long write_tile(FILE *geoms, std::atomic<long long> *geompos_in, char *meta
if (reduced) {
strategy->tiny_polygons++;
}
if (sf.geometry.size() == 0) {
continue;
}
}
}
if (sf.t == VT_POLYGON || sf.t == VT_LINE) {
Expand Down
2 changes: 1 addition & 1 deletion version.hpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#ifndef VERSION_HPP
#define VERSION_HPP

#define VERSION "v2.15.2"
#define VERSION "v2.16.0"

#endif

0 comments on commit 0799297

Please sign in to comment.