Skip to content

Commit

Permalink
index geobuf in protobuf format, supports id query, bbox query (#27)
Browse files Browse the repository at this point in the history
* some TODO

* explicit header_size

* add draft for geobuf_index proto

* no default

* geobuf index

* not ready

* add js decoding

* fix

* lint code

* add attrs

* not ready

* fix

* ready to release

* fix

---------

Co-authored-by: TANG ZHIXIONG <[email protected]>
  • Loading branch information
district10 and zhixiong-tang authored Nov 18, 2023
1 parent 3abc3aa commit b06e682
Show file tree
Hide file tree
Showing 14 changed files with 466 additions and 150 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,5 @@ benchmarks/*.pbf*
data/suzhoubeizhan.json
tests/*.json
data/suzhoubeizhan.idx
package-lock.json
node_modules
3 changes: 3 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,9 @@ cli_test4:

.PHONY: cli_test cli_test1 cli_test2 cli_test3

geobuf_index.js: geobuf_index.proto
pbf $< > $@

# conda create -y -n py36 python=3.6
# conda create -y -n py37 python=3.7
# conda create -y -n py38 python=3.8
Expand Down
15 changes: 15 additions & 0 deletions bin/index2json
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
#!/usr/bin/env node

var Index = require('../geobuf_index.js').Index,
Pbf = require('pbf'),
fs = require('fs')
concat = require('concat-stream');

var input = process.stdin.isTTY ? fs.createReadStream(process.argv[2]) : process.stdin;

input.pipe(concat(function(buf) {
var pbf = new Pbf(buf);
var obj = Index.read(pbf);
var data = JSON.stringify(obj);
process.stdout.write(Buffer.allocUnsafe ? Buffer.from(data) : new Buffer(data));
}));
4 changes: 4 additions & 0 deletions docs/about/release-notes.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,10 @@ To upgrade `pybind11-geobuf` to the latest version, use pip:
pip install -U pybind11-geobuf
```

## Version 0.2.0 (2023-11-18)

* Indexing geobuf in protobuf format; Spec: geobuf_index.proto

## Version 0.1.9 (2023-11-15)

* Indexing geobuf (like flatgeobuf, but more general), making it random accessible
Expand Down
53 changes: 53 additions & 0 deletions geobuf_index.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
'use strict'; // code generated by pbf v3.2.1

// Index ========================================

var Index = exports.Index = {};

Index.read = function (pbf, end) {
return pbf.readFields(Index._readField, {header_size: 0, num_features: 0, offsets: [], fids: [], idxs: [], packed_rtree: null}, end);
};
Index._readField = function (tag, obj, pbf) {
if (tag === 1) obj.header_size = pbf.readVarint();
else if (tag === 2) obj.num_features = pbf.readVarint();
else if (tag === 3) pbf.readPackedVarint(obj.offsets);
else if (tag === 4) obj.fids.push(pbf.readString());
else if (tag === 5) pbf.readPackedVarint(obj.idxs);
else if (tag === 8) obj.packed_rtree = Index.PackedRTree.read(pbf, pbf.readVarint() + pbf.pos);
};
Index.write = function (obj, pbf) {
if (obj.header_size) pbf.writeVarintField(1, obj.header_size);
if (obj.num_features) pbf.writeVarintField(2, obj.num_features);
if (obj.offsets) pbf.writePackedVarint(3, obj.offsets);
if (obj.fids) for (var i = 0; i < obj.fids.length; i++) pbf.writeStringField(4, obj.fids[i]);
if (obj.idxs) pbf.writePackedVarint(5, obj.idxs);
if (obj.packed_rtree) pbf.writeMessage(8, Index.PackedRTree.write, obj.packed_rtree);
};

// Index.PackedRTree ========================================

Index.PackedRTree = {};

Index.PackedRTree.read = function (pbf, end) {
return pbf.readFields(Index.PackedRTree._readField, {left: 0, bottom: 0, right: 0, top: 0, num_items: 0, num_nodes: 0, node_size: 0, serialized: null}, end);
};
Index.PackedRTree._readField = function (tag, obj, pbf) {
if (tag === 1) obj.left = pbf.readDouble();
else if (tag === 2) obj.bottom = pbf.readDouble();
else if (tag === 3) obj.right = pbf.readDouble();
else if (tag === 4) obj.top = pbf.readDouble();
else if (tag === 5) obj.num_items = pbf.readVarint();
else if (tag === 6) obj.num_nodes = pbf.readVarint();
else if (tag === 7) obj.node_size = pbf.readVarint();
else if (tag === 8) obj.serialized = pbf.readBytes();
};
Index.PackedRTree.write = function (obj, pbf) {
if (obj.left) pbf.writeDoubleField(1, obj.left);
if (obj.bottom) pbf.writeDoubleField(2, obj.bottom);
if (obj.right) pbf.writeDoubleField(3, obj.right);
if (obj.top) pbf.writeDoubleField(4, obj.top);
if (obj.num_items) pbf.writeVarintField(5, obj.num_items);
if (obj.num_nodes) pbf.writeVarintField(6, obj.num_nodes);
if (obj.node_size) pbf.writeVarintField(7, obj.node_size);
if (obj.serialized) pbf.writeBytesField(8, obj.serialized);
};
38 changes: 38 additions & 0 deletions geobuf_index.proto
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
option optimize_for = LITE_RUNTIME;

message Index {
uint32 header_size = 1; // can parse Data[0:header_size] to init keys/dim/e
uint32 num_features = 2;
repeated uint64 offsets = 3 [packed = true]; // can parse Data[offsets[i]:offsets[i+1]] to get ith feature
// len(offsets) == num_features + 2, can parse data[offsets[-2]:offsets[-1]] to get feature_collection.custom_properties

// feature id?
// - feature.id
// - feature.properties.id
// - feature.properties.fid
// - feature.properties.feature_id
// related docs:
// - https://docs.mapbox.com/mapbox-gl-js/api/map/#instance-members-feature-state
// - https://github.com/mapbox/mapbox-gl-js/pull/8987
repeated string fids = 4;
repeated uint32 idxs = 5 [packed = true];

optional PackedRTree packed_rtree = 8; // spatial index
message PackedRTree {
double left = 1;
double bottom = 2;
double right = 3;
double top = 4;
uint32 num_items = 5;
uint32 num_nodes = 6;
uint32 node_size = 7;
bytes serialized = 8;
}

// Tools
// - encoding
// python3 -m pybind11_geobuf index_geobuf path/to/geobuf.pbf output/geobuf.idx
// - inspect index
// python3 -m pybind11_geobuf pbf_decode geobuf.idx
// node bin/index2json geobuf.idx # need `npm i pbf concat-stream`
}
32 changes: 32 additions & 0 deletions package.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
{
"name": "geobuf_index",
"version": "0.0.1",
"description": "index for geobuf",
"main": "index.js",
"bin": {
"debug_index": "bin/json2geobuf"
},
"scripts": {},
"repository": {
"type": "git",
"url": "[email protected]:cubao/geobuf_cpp.git"
},
"files": [
"index.js",
"bin",
"geobuf_index.proto"
],
"keywords": [
"geobuf",
"index"
],
"license": "ISC",
"bugs": {
"url": "https://github.com/cubao/geobuf_cpp/issues"
},
"homepage": "https://github.com/cubao/geobuf_cpp",
"dependencies": {
"concat-stream": "^2.0.0",
"pbf": "^3.2.1"
}
}
15 changes: 13 additions & 2 deletions pybind11_geobuf/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,12 +210,23 @@ def is_subset_of(path1: str, path2: str):
assert is_subset_of_impl(path1, path2)


def index_geobuf(input_geobuf_path: str, output_index_path: str):
def index_geobuf(
input_geobuf_path: str,
output_index_path: str,
*,
feature_id: Optional[str] = "@",
packed_rtree: Optional[str] = "@",
):
os.makedirs(
os.path.dirname(os.path.abspath(output_index_path)),
exist_ok=True,
)
return GeobufIndex.indexing(input_geobuf_path, output_index_path)
return GeobufIndex.indexing(
input_geobuf_path,
output_index_path,
feature_id=feature_id,
packed_rtree=packed_rtree,
)


if __name__ == "__main__":
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ def build_extension(self, ext):
# logic and declaration, and simpler if you include description/version in a file.
setup(
name="pybind11_geobuf",
version="0.1.9",
version="0.2.0",
author="tzx",
author_email="[email protected]",
url="https://geobuf-cpp.readthedocs.io",
Expand Down
17 changes: 10 additions & 7 deletions src/geobuf/geobuf.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
#include <iostream>
#include <sstream>

#include "spdlog/spdlog.h"
#include "spdlog/spdlog.h" // fmt::format
// fix exposed macro 'GetObject' from wingdi.h (included by spdlog.h) under
// windows, see https://github.com/Tencent/rapidjson/issues/1448
#ifdef GetObject
Expand Down Expand Up @@ -698,10 +698,13 @@ mapbox::geojson::geojson Decoder::decode(const uint8_t *data, size_t size)
const auto tag = pbf.tag();
if (tag == 1) {
keys.push_back(pbf.get_string());
header_size = pbf.data().data() - head;
} else if (tag == 2) {
dim = pbf.get_uint32();
header_size = pbf.data().data() - head;
} else if (tag == 3) {
e = std::pow(10, pbf.get_uint32());
header_size = pbf.data().data() - head;
} else if (tag == 4) {
protozero::pbf_reader pbf_fc = pbf.get_message();
return readFeatureCollection(pbf_fc);
Expand Down Expand Up @@ -822,9 +825,9 @@ mapbox::geojson::feature_collection Decoder::readFeatureCollection(Pbf &pbf)
mapbox::geojson::feature_collection fc;
std::vector<mapbox::geojson::value> values;
offsets.clear();
int props_cursor = -1;
std::optional<uint64_t> props_cursor;
while (true) {
int cursor = pbf.data().data() - head;
uint64_t cursor = pbf.data().data() - head;
if (!pbf.next()) {
break;
}
Expand All @@ -835,7 +838,7 @@ mapbox::geojson::feature_collection Decoder::readFeatureCollection(Pbf &pbf)
offsets.push_back(cursor);
continue;
}
if (props_cursor < 0) {
if (!props_cursor) {
props_cursor = cursor;
}
if (tag == 13) {
Expand All @@ -856,9 +859,9 @@ mapbox::geojson::feature_collection Decoder::readFeatureCollection(Pbf &pbf)
}
}
// props start
int tail = pbf.data().data() - head;
if (props_cursor > 0 && !offsets.empty() && props_cursor > offsets.back()) {
offsets.push_back(props_cursor);
uint64_t tail = pbf.data().data() - head;
if (props_cursor && !offsets.empty() && *props_cursor > offsets.back()) {
offsets.push_back(*props_cursor);
} else {
offsets.push_back(tail);
}
Expand Down
6 changes: 4 additions & 2 deletions src/geobuf/geobuf.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -196,7 +196,8 @@ struct Decoder
int precision() const { return std::log10(e); }
int __dim() const { return dim; }
std::vector<std::string> __keys() const { return keys; }
std::vector<int> __offsets() const { return offsets; }
uint32_t __header_size() const { return header_size; }
std::vector<uint64_t> __offsets() const { return offsets; }

private:
mapbox::geojson::feature_collection readFeatureCollection(Pbf &pbf);
Expand All @@ -211,7 +212,8 @@ struct Decoder
std::vector<std::string> keys;

const char *head = nullptr;
std::vector<int> offsets;
uint32_t header_size = -1;
std::vector<uint64_t> offsets;
};

} // namespace geobuf
Expand Down
Loading

0 comments on commit b06e682

Please sign in to comment.