conveyal
diff --git a/‎.gitignore
+10-14 b/‎.gitignore
+10-14
diff --git a/‎pom.xml
+14 b/‎pom.xml
+14
diff --git a/‎src/main/java/com/conveyal/osmlib/App.java
-13 b/‎src/main/java/com/conveyal/osmlib/App.java
-13
diff --git a/‎src/main/java/com/conveyal/osmlib/Node.java
+10-11 b/‎src/main/java/com/conveyal/osmlib/Node.java
+10-11
diff --git a/‎src/main/java/com/conveyal/osmlib/NodeGeomFilter.java
+1-1 b/‎src/main/java/com/conveyal/osmlib/NodeGeomFilter.java
+1-1
diff --git a/‎src/main/java/com/conveyal/osmlib/NodeLoader.java
+1-1 b/‎src/main/java/com/conveyal/osmlib/NodeLoader.java
+1-1
diff --git a/‎src/main/java/com/conveyal/osmlib/NodeTracker.java
+3-4 b/‎src/main/java/com/conveyal/osmlib/NodeTracker.java
+3-4
diff --git a/‎src/main/java/com/conveyal/osmlib/OSM.java
+59-51 b/‎src/main/java/com/conveyal/osmlib/OSM.java
+59-51
diff --git a/‎src/main/java/com/conveyal/osmlib/OSMEntity.java
+5-10 b/‎src/main/java/com/conveyal/osmlib/OSMEntity.java
+5-10
@@ -1,17 +1,13 @@
-*.class
-
-# Mobile Tools for Java (J2ME)
-.mtj.tmp/
-
-# Package Files #
-*.jar
-*.war
-*.ear
-
-# virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml
-hs_err_pid*
-
+dependency-reduced-pom.xml
+target/
+build/
+*~
 .classpath
 .project
 .settings
-target
+.gradle
+*.ipr
+*.iml
+*.iws
+*.swp
+.idea
@@ -7,6 +7,20 @@
   <version>0.1-SNAPSHOT</version>
   <name>osm-lib</name>
   <url>http://maven.apache.org</url>
+  <build>
+    <plugins>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-compiler-plugin</artifactId>
+        <version>3.2</version>
+        <configuration>
+          <!-- Target Java versions -->
+          <source>1.7</source>
+          <target>1.7</target>
+        </configuration>
+      </plugin>
+    </plugins>
+  </build>
   <dependencies>
     <dependency>
       <groupId>junit</groupId>
 
@@ -8,8 +8,7 @@ public class Node extends OSMEntity implements Serializable {
 
     private static final double FIXED_PRECISION_FACTOR = 1e6;
 
-    public Node () { 
-    }
+    public Node () { }
 
     public Node (double lat, double lon) {
         setLatLon(lat, lon);
@@ -28,13 +27,13 @@ public void setLatLon (double lat, double lon) {
         this.fixedLon = (int)(lon * FIXED_PRECISION_FACTOR);
     }
 
-	@Override
-	public Type getType() {
-		return OSMEntity.Type.WAY;
-	}
-	
-	public String toString(){
-		return "[Node "+getLat()+" "+getLon()+"]";
-	}
+    @Override
+    public Type getType() {
+        return Type.NODE;
+    }
 
-}
+    public String toString() {
+        return "[Node "+getLat()+" "+getLon()+"]";
+    }
+    
+}
@@ -55,4 +55,4 @@ private boolean inGeom(double lat, double lon) {
         return lon > minLon && lon < maxLon && lat > minLat && lon < maxLat;
     }
 
-}
+}
@@ -33,4 +33,4 @@ public void handleNode(long id, Node node) {
         }
     };
 
-}
+}
@@ -1,11 +1,10 @@
 package com.conveyal.osmlib;
 
-import java.util.Map;
-
+import com.google.common.collect.Maps;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-import com.google.common.collect.Maps;
+import java.util.Map;
 
 /** 
  * A sparse bit set capable of handling 64-bit int indexes (like OSM IDs).
@@ -85,4 +84,4 @@ public boolean contains(long x) {
 // Finally, load all nodes that are in any of those ways.
 
 // Filters: bbox and tags.
-// Use C PBF converter to pre-filter the data. Toolchains.
+// Use C PBF converter to pre-filter the data. Toolchains.
@@ -1,14 +1,14 @@
 package com.conveyal.osmlib;
 
+import com.conveyal.osmlib.serializer.NodeSerializer;
+import com.conveyal.osmlib.serializer.WaySerializer;
 import com.google.common.base.Charsets;
 import com.google.common.primitives.Longs;
 import com.vividsolutions.jts.geom.Envelope;
 import org.mapdb.BTreeKeySerializer;
 import org.mapdb.DB;
 import org.mapdb.DBMaker;
 import org.mapdb.Fun.Tuple3;
-import com.conveyal.osmlib.serializer.NodeSerializer;
-import com.conveyal.osmlib.serializer.WaySerializer;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -21,6 +21,8 @@
 /**
  * OTP representation of a subset of OpenStreetMap. One or more PBF files can be loaded into this
  * object, which serves as a simplistic database for fetching and iterating over OSM elements.
+ * Using DB TreeMaps is often not any slower than memory. HashMaps are both bigger and slower.
+ * This is probably because our keys are so small. A hashmap needs to store both the long key and its hash.
  */
 public class OSM {
 
@@ -34,18 +36,23 @@ public class OSM {
     /** The MapDB backing this OSM, if any. */
     DB db = null;
 
-    // Using DB TreeMaps is observed not to be slower than memory. HashMaps are both bigger and slower.
-
-    /** If diskPath is null, OSM will be loaded into memory. */
+    /** 
+     * Construct a new MapDB-based random-access OSM data store.
+     * If diskPath is null, OSM will be loaded into a temporary file and deleted on shutdown.
+     * If diskPath is the string "__MEMORY__" the OSM will be stored entirely in memory. 
+     * 
+     * @param diskPath - the file in which to save the data, null for a temp file, or "__MEMORY__" for in-memory.
+     */
     public OSM (String diskPath) {
         DBMaker dbMaker;
         if (diskPath == null) {
             LOG.info("OSM will be stored in a temporary file.");
-            dbMaker = DBMaker.newTempFileDB();
+            dbMaker = DBMaker.newTempFileDB().deleteFilesAfterClose();
         } else {
             if (diskPath.equals("__MEMORY__")) {
                 LOG.info("OSM will be stored in memory.");
-                dbMaker = DBMaker.newMemoryDirectDB(); // off-heap, no garbage collection
+                // 'direct' means off-heap memory, no garbage collection overhead
+                dbMaker = DBMaker.newMemoryDirectDB(); 
             } else {
                 LOG.info("OSM will be stored in file {}.", diskPath);
                 dbMaker = DBMaker.newFileDB(new File(diskPath));
@@ -57,70 +64,71 @@ public OSM (String diskPath) {
         db = dbMaker.asyncWriteEnable()
                 .transactionDisable()
                 .compressionEnable()
-                .cacheSize(100 * 1024 * 1024)
+                .cacheSize(50 * 1024 * 1024)
                 .mmapFileEnableIfSupported()
                 .closeOnJvmShutdown()
                 .make();
+
+        nodes = db.createTreeMap("nodes")
+                .valueSerializer(new NodeSerializer())
+                .makeLongMap();
 
-        try{
-	        nodes = db.createTreeMap("nodes")
-	                .valueSerializer(new NodeSerializer())
-	                .makeLongMap();
-        } catch ( IllegalArgumentException ex ){
-        	nodes = db.getTreeMap("nodes");
-        }
+        ways =  db.createTreeMap("ways")
+                .valueSerializer(new WaySerializer())
+                .makeLongMap();
 
-        try{
-	        ways =  db.createTreeMap("ways")
-	                .valueSerializer(new WaySerializer())
-	                .makeLongMap();
-        } catch ( IllegalArgumentException ex ){
-        	ways = db.getTreeMap("ways");
-        }
-        
-        try{
-	        relations = db.createTreeMap("relations")
-	                .makeLongMap();
-        } catch ( IllegalArgumentException ex ){
-        	relations = db.getTreeMap("relations");
-        }
+        relations = db.createTreeMap("relations")
+                .makeLongMap();
 
-        // Serializer delta compresses the tuple as a whole and packs ints, 
+        // Serializer delta-compresses the tuple as a whole and variable-width packs ints,
         // but does not recursively delta-code its elements.
-        try{
-	        index = db.createTreeSet("spatial_index")
-	                .serializer(BTreeKeySerializer.TUPLE3) 
-	                .make();
-        } catch ( IllegalArgumentException ex ){
-        	index = db.getTreeSet( "spatial_index" );
-        }
+        index = db.createTreeSet("spatial_index")
+                .serializer(BTreeKeySerializer.TUPLE3) 
+                .make();
     }
 
-    // boolean filterTags
-    public static OSM fromPBF(String pbfFile) {
-        LOG.info("Reading entire PBF file '{}'", pbfFile);
-        Parser parser = new Parser();
-        parser.parse(pbfFile);
-        return parser.osm;
+    // boolean to filter entities on tags, or list of tag keys to retain?
+    public void loadFromPBFStream (InputStream in) {
+        LOG.info("Reading PBF stream.");
+        Parser parser = new Parser(this);
+        parser.parse(in);
+    }
+
+    public void loadFromPBFFile (String filePath) {
+        try {
+            LOG.info("Reading PBF from file '{}'.", filePath);
+            Parser parser = new Parser(this);
+            parser.parse(new FileInputStream(filePath));
+        } catch (FileNotFoundException e) {
+            LOG.error("Error occurred while parsing PBF file '{}'", filePath);
+            e.printStackTrace();
+        }
     }
 
-    public static OSM fromPBF(String pbfFile, Envelope env) {
+    // TODO we know that the ordering of entity types within a PBF file is almost always nodes, then ways,
+    // then relations. Here we are doing three passes over the whole file, but we could get away with one
+    // and a half by combining the NodeGeomFilter and the WayLoader, then bailing out of the NodeLoader
+    // as soon as it sees a Way.
+    // In any case we can't spatially filter PBF data coming from a stream because we'd need to backtrack.
+    public void loadFromPBF (String pbfFile, Envelope env) {
         LOG.info("Reading PBF file '{}' filtering with envelope {}", pbfFile, env);
-        OSM osm = new OSM("/var/vex/osm");
         LOG.info("Finding nodes within the bounding geometry.");
         NodeGeomFilter ngf = new NodeGeomFilter(env);
         ngf.parse(pbfFile);
+        LOG.info("LOAD RELATIONS HERE");
         LOG.info("Loading ways containing nodes found within the bounding geometry.");
-        WayLoader wl = new WayLoader(osm, ngf.nodesInGeom);
+        WayLoader wl = new WayLoader(this, ngf.nodesInGeom);
         wl.parse(pbfFile);
         LOG.info("Loading nodes used in the retained ways.");
-        NodeLoader nl = new NodeLoader(osm, wl.nodesInWays);
+        NodeLoader nl = new NodeLoader(this, wl.nodesInWays);
         nl.parse(pbfFile);
-        LOG.info("Loading relations (which ones?)");
-        return osm;
     }
 
-    /** Decode OSM gzipped text format produced by Vanilla Extract. */
+    /** 
+     * Decode OSM gzipped text format produced by Vanilla Extract.
+     * It remains to be determined whether this VEX text format is better or worse than the slightly 
+     * more complicated VEX binary format, but it's certainly simpler and cleaner than PBF.
+     */
     public void loadFromVexStream (InputStream vexStream) throws IOException {
         InputStream unzippedStream = new GZIPInputStream(vexStream);
         Reader decoded = new InputStreamReader(unzippedStream, Charsets.UTF_8); // UTF8 ENCODING is important
@@ -161,4 +169,4 @@ public void loadFromVexStream (InputStream vexStream) throws IOException {
         }
         bufferedReader.close();
     }
-}
+}
@@ -9,16 +9,10 @@ public abstract class OSMEntity implements Serializable {
 
     private static final long serialVersionUID = 1L;
 
-    // Format: key1=val1;key2=val2
-    // TODO MapDB can probably serialize these efficiently even if it's a Map<String,String> or a List<Tag>
-    // Also OSM allows multimap tags using the semicolon:
-    // http://wiki.openstreetmap.org/wiki/Talk:Semi-colon_value_separator
-    // Implementing a Set<Pair<String>> with a single string is not necessarily a good idea...
-
     public static enum Type {
         NODE, WAY, RELATION;
     }
-    
+
     public List<Tag> tags;
 
     public static class Tag implements Serializable {
@@ -80,7 +74,8 @@ public void addTag (String key, String value) {
         }
         tags.add(new Tag(key, value));
     }
-    
-    public abstract Type getType();
 
-}
+    /** This feels strange because we're using Enums to duplicate Java type data (Node.class) */
+    public abstract Type getType();
+    
+}
Original file line number	Diff line number	Diff line change
`@@ -55,4 +55,4 @@ private boolean inGeom(double lat, double lon) {`
`55`	`55`	`return lon > minLon && lon < maxLon && lat > minLat && lon < maxLat;`
`56`	`56`	`}`
`57`	`57`
`58`		`-}`
	`58`	`+}`
Original file line number	Diff line number	Diff line change
`@@ -33,4 +33,4 @@ public void handleNode(long id, Node node) {`
`33`	`33`	`}`
`34`	`34`	`};`
`35`	`35`
`36`		`-}`
	`36`	`+}`