1
1
package com .conveyal .osmlib ;
2
2
3
+ import com .conveyal .osmlib .serializer .NodeSerializer ;
4
+ import com .conveyal .osmlib .serializer .WaySerializer ;
3
5
import com .google .common .base .Charsets ;
4
6
import com .google .common .primitives .Longs ;
5
7
import com .vividsolutions .jts .geom .Envelope ;
6
8
import org .mapdb .BTreeKeySerializer ;
7
9
import org .mapdb .DB ;
8
10
import org .mapdb .DBMaker ;
9
11
import org .mapdb .Fun .Tuple3 ;
10
- import com .conveyal .osmlib .serializer .NodeSerializer ;
11
- import com .conveyal .osmlib .serializer .WaySerializer ;
12
12
import org .slf4j .Logger ;
13
13
import org .slf4j .LoggerFactory ;
14
14
21
21
/**
22
22
* OTP representation of a subset of OpenStreetMap. One or more PBF files can be loaded into this
23
23
* object, which serves as a simplistic database for fetching and iterating over OSM elements.
24
+ * Using DB TreeMaps is often not any slower than memory. HashMaps are both bigger and slower.
25
+ * This is probably because our keys are so small. A hashmap needs to store both the long key and its hash.
24
26
*/
25
27
public class OSM {
26
28
@@ -34,18 +36,23 @@ public class OSM {
34
36
/** The MapDB backing this OSM, if any. */
35
37
DB db = null ;
36
38
37
- // Using DB TreeMaps is observed not to be slower than memory. HashMaps are both bigger and slower.
38
-
39
- /** If diskPath is null, OSM will be loaded into memory. */
39
+ /**
40
+ * Construct a new MapDB-based random-access OSM data store.
41
+ * If diskPath is null, OSM will be loaded into a temporary file and deleted on shutdown.
42
+ * If diskPath is the string "__MEMORY__" the OSM will be stored entirely in memory.
43
+ *
44
+ * @param diskPath - the file in which to save the data, null for a temp file, or "__MEMORY__" for in-memory.
45
+ */
40
46
public OSM (String diskPath ) {
41
47
DBMaker dbMaker ;
42
48
if (diskPath == null ) {
43
49
LOG .info ("OSM will be stored in a temporary file." );
44
- dbMaker = DBMaker .newTempFileDB ();
50
+ dbMaker = DBMaker .newTempFileDB (). deleteFilesAfterClose () ;
45
51
} else {
46
52
if (diskPath .equals ("__MEMORY__" )) {
47
53
LOG .info ("OSM will be stored in memory." );
48
- dbMaker = DBMaker .newMemoryDirectDB (); // off-heap, no garbage collection
54
+ // 'direct' means off-heap memory, no garbage collection overhead
55
+ dbMaker = DBMaker .newMemoryDirectDB ();
49
56
} else {
50
57
LOG .info ("OSM will be stored in file {}." , diskPath );
51
58
dbMaker = DBMaker .newFileDB (new File (diskPath ));
@@ -57,70 +64,71 @@ public OSM (String diskPath) {
57
64
db = dbMaker .asyncWriteEnable ()
58
65
.transactionDisable ()
59
66
.compressionEnable ()
60
- .cacheSize (100 * 1024 * 1024 )
67
+ .cacheSize (50 * 1024 * 1024 )
61
68
.mmapFileEnableIfSupported ()
62
69
.closeOnJvmShutdown ()
63
70
.make ();
71
+
72
+ nodes = db .createTreeMap ("nodes" )
73
+ .valueSerializer (new NodeSerializer ())
74
+ .makeLongMap ();
64
75
65
- try {
66
- nodes = db .createTreeMap ("nodes" )
67
- .valueSerializer (new NodeSerializer ())
68
- .makeLongMap ();
69
- } catch ( IllegalArgumentException ex ){
70
- nodes = db .getTreeMap ("nodes" );
71
- }
76
+ ways = db .createTreeMap ("ways" )
77
+ .valueSerializer (new WaySerializer ())
78
+ .makeLongMap ();
72
79
73
- try {
74
- ways = db .createTreeMap ("ways" )
75
- .valueSerializer (new WaySerializer ())
76
- .makeLongMap ();
77
- } catch ( IllegalArgumentException ex ){
78
- ways = db .getTreeMap ("ways" );
79
- }
80
-
81
- try {
82
- relations = db .createTreeMap ("relations" )
83
- .makeLongMap ();
84
- } catch ( IllegalArgumentException ex ){
85
- relations = db .getTreeMap ("relations" );
86
- }
80
+ relations = db .createTreeMap ("relations" )
81
+ .makeLongMap ();
87
82
88
- // Serializer delta compresses the tuple as a whole and packs ints,
83
+ // Serializer delta- compresses the tuple as a whole and variable-width packs ints,
89
84
// but does not recursively delta-code its elements.
90
- try {
91
- index = db .createTreeSet ("spatial_index" )
92
- .serializer (BTreeKeySerializer .TUPLE3 )
93
- .make ();
94
- } catch ( IllegalArgumentException ex ){
95
- index = db .getTreeSet ( "spatial_index" );
96
- }
85
+ index = db .createTreeSet ("spatial_index" )
86
+ .serializer (BTreeKeySerializer .TUPLE3 )
87
+ .make ();
97
88
}
98
89
99
- // boolean filterTags
100
- public static OSM fromPBF (String pbfFile ) {
101
- LOG .info ("Reading entire PBF file '{}'" , pbfFile );
102
- Parser parser = new Parser ();
103
- parser .parse (pbfFile );
104
- return parser .osm ;
90
+ // boolean to filter entities on tags, or list of tag keys to retain?
91
+ public void loadFromPBFStream (InputStream in ) {
92
+ LOG .info ("Reading PBF stream." );
93
+ Parser parser = new Parser (this );
94
+ parser .parse (in );
95
+ }
96
+
97
+ public void loadFromPBFFile (String filePath ) {
98
+ try {
99
+ LOG .info ("Reading PBF from file '{}'." , filePath );
100
+ Parser parser = new Parser (this );
101
+ parser .parse (new FileInputStream (filePath ));
102
+ } catch (FileNotFoundException e ) {
103
+ LOG .error ("Error occurred while parsing PBF file '{}'" , filePath );
104
+ e .printStackTrace ();
105
+ }
105
106
}
106
107
107
- public static OSM fromPBF (String pbfFile , Envelope env ) {
108
+ // TODO we know that the ordering of entity types within a PBF file is almost always nodes, then ways,
109
+ // then relations. Here we are doing three passes over the whole file, but we could get away with one
110
+ // and a half by combining the NodeGeomFilter and the WayLoader, then bailing out of the NodeLoader
111
+ // as soon as it sees a Way.
112
+ // In any case we can't spatially filter PBF data coming from a stream because we'd need to backtrack.
113
+ public void loadFromPBF (String pbfFile , Envelope env ) {
108
114
LOG .info ("Reading PBF file '{}' filtering with envelope {}" , pbfFile , env );
109
- OSM osm = new OSM ("/var/vex/osm" );
110
115
LOG .info ("Finding nodes within the bounding geometry." );
111
116
NodeGeomFilter ngf = new NodeGeomFilter (env );
112
117
ngf .parse (pbfFile );
118
+ LOG .info ("LOAD RELATIONS HERE" );
113
119
LOG .info ("Loading ways containing nodes found within the bounding geometry." );
114
- WayLoader wl = new WayLoader (osm , ngf .nodesInGeom );
120
+ WayLoader wl = new WayLoader (this , ngf .nodesInGeom );
115
121
wl .parse (pbfFile );
116
122
LOG .info ("Loading nodes used in the retained ways." );
117
- NodeLoader nl = new NodeLoader (osm , wl .nodesInWays );
123
+ NodeLoader nl = new NodeLoader (this , wl .nodesInWays );
118
124
nl .parse (pbfFile );
119
- LOG .info ("Loading relations (which ones?)" );
120
- return osm ;
121
125
}
122
126
123
- /** Decode OSM gzipped text format produced by Vanilla Extract. */
127
+ /**
128
+ * Decode OSM gzipped text format produced by Vanilla Extract.
129
+ * It remains to be determined whether this VEX text format is better or worse than the slightly
130
+ * more complicated VEX binary format, but it's certainly simpler and cleaner than PBF.
131
+ */
124
132
public void loadFromVexStream (InputStream vexStream ) throws IOException {
125
133
InputStream unzippedStream = new GZIPInputStream (vexStream );
126
134
Reader decoded = new InputStreamReader (unzippedStream , Charsets .UTF_8 ); // UTF8 ENCODING is important
@@ -161,4 +169,4 @@ public void loadFromVexStream (InputStream vexStream) throws IOException {
161
169
}
162
170
bufferedReader .close ();
163
171
}
164
- }
172
+ }
0 commit comments