Skip to content

Commit 45e4c52

Browse files
committed
feat(validation): add some more validation of shapes
Refs #167
1 parent ba9e195 commit 45e4c52

17 files changed

+124
-12
lines changed

src/main/java/com/conveyal/gtfs/error/NewGTFSErrorType.java

+3-1
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,9 @@ public enum NewGTFSErrorType {
4444
ROUTE_SHORT_NAME_TOO_LONG(Priority.MEDIUM, "The short name of a route is too long for display in standard GTFS consumer applications."),
4545
SERVICE_NEVER_ACTIVE(Priority.MEDIUM, "A service code was defined, but is never active on any date."),
4646
SERVICE_UNUSED(Priority.MEDIUM, "A service code was defined, but is never referenced by any trips."),
47-
SHAPE_DIST_TRAVELED_NOT_INCREASING(Priority.MEDIUM, "Shape distance traveled must increase with stop times."),
47+
SHAPE_UNUSED(Priority.LOW, "A shape was defined, but is never referenced by any trips."),
48+
SHAPE_SHAPE_DIST_TRAVELED_NOT_INCREASING(Priority.MEDIUM, "Shape distance traveled must increase in shapes."),
49+
STOP_TIME_SHAPE_DIST_TRAVELED_NOT_INCREASING(Priority.MEDIUM, "Shape distance traveled must increase in stop times."),
4850
STOP_DESCRIPTION_SAME_AS_NAME(Priority.LOW, "The description of a stop is identical to its name, so does not add any information."),
4951
STOP_LOW_POPULATION_DENSITY(Priority.HIGH, "A stop is located in a geographic area with very low human population density."),
5052
STOP_NAME_MISSING(Priority.MEDIUM, "A stop does not have a name."),

src/main/java/com/conveyal/gtfs/loader/Feed.java

+12-10
Original file line numberDiff line numberDiff line change
@@ -32,15 +32,15 @@ public class Feed {
3232
// This may be the empty string if the feed is stored in the root ("public") schema.
3333
public final String tablePrefix;
3434

35-
public final TableReader<Agency> agencies;
36-
public final TableReader<Calendar> calendars;
37-
public final TableReader<CalendarDate> calendarDates;
35+
public final JDBCTableReader<Agency> agencies;
36+
public final JDBCTableReader<Calendar> calendars;
37+
public final JDBCTableReader<CalendarDate> calendarDates;
3838
// public final TableReader<Fare> fares;
39-
public final TableReader<Route> routes;
40-
public final TableReader<Stop> stops;
41-
public final TableReader<Trip> trips;
42-
// public final TableReader<ShapePoint> shapePoints;
43-
public final TableReader<StopTime> stopTimes;
39+
public final JDBCTableReader<Route> routes;
40+
public final JDBCTableReader<Stop> stops;
41+
public final JDBCTableReader<Trip> trips;
42+
public final JDBCTableReader<ShapePoint> shapePoints;
43+
public final JDBCTableReader<StopTime> stopTimes;
4444

4545
/* A place to accumulate errors while the feed is loaded. Tolerate as many errors as possible and keep on loading. */
4646
// TODO remove this and use only NewGTFSErrors in Validators, loaded into a JDBC table
@@ -63,7 +63,7 @@ public Feed (DataSource dataSource, String tablePrefix) {
6363
routes = new JDBCTableReader(Table.ROUTES, dataSource, tablePrefix, EntityPopulator.ROUTE);
6464
stops = new JDBCTableReader(Table.STOPS, dataSource, tablePrefix, EntityPopulator.STOP);
6565
trips = new JDBCTableReader(Table.TRIPS, dataSource, tablePrefix, EntityPopulator.TRIP);
66-
// shapePoints = new JDBCTableReader(Table.SHAPES, dataSource, tablePrefix, EntityPopulator.SHAPE_POINT);
66+
shapePoints = new JDBCTableReader(Table.SHAPES, dataSource, tablePrefix, EntityPopulator.SHAPE_POINT);
6767
stopTimes = new JDBCTableReader(Table.STOP_TIMES, dataSource, tablePrefix, EntityPopulator.STOP_TIME);
6868
}
6969

@@ -91,7 +91,9 @@ public ValidationResult validate () {
9191
new DuplicateStopsValidator(this, errorStorage),
9292
new TimeZoneValidator(this, errorStorage),
9393
new NewTripTimesValidator(this, errorStorage),
94-
new NamesValidator(this, errorStorage));
94+
new NamesValidator(this, errorStorage),
95+
new ShapeValidator(this, errorStorage)
96+
);
9597

9698
for (FeedValidator feedValidator : feedValidators) {
9799
String validatorName = feedValidator.getClass().getSimpleName();
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
package com.conveyal.gtfs.validator;
2+
3+
import com.conveyal.gtfs.error.SQLErrorStorage;
4+
import com.conveyal.gtfs.loader.Feed;
5+
import com.conveyal.gtfs.model.ShapePoint;
6+
import com.conveyal.gtfs.model.Trip;
7+
8+
import java.util.HashMap;
9+
import java.util.HashSet;
10+
import java.util.Map;
11+
import java.util.Set;
12+
13+
import static com.conveyal.gtfs.error.NewGTFSErrorType.SHAPE_SHAPE_DIST_TRAVELED_NOT_INCREASING;
14+
import static com.conveyal.gtfs.error.NewGTFSErrorType.SHAPE_UNUSED;
15+
16+
/**
17+
* A validator that checks the integrity of the shapes records
18+
*/
19+
public class ShapeValidator extends FeedValidator {
20+
public ShapeValidator(Feed feed, SQLErrorStorage errorStorage) {
21+
super(feed, errorStorage);
22+
}
23+
24+
@Override
25+
public void validate() {
26+
ShapePoint lastShapePoint = null;
27+
Map<String, ShapePoint> firstShapePointByShapeId = new HashMap<>();
28+
// this stores all shape ids found in the shapes initially, but will eventually be modified to only have the
29+
// extra shape ids if there are any
30+
Set<String> extraShapeIds = new HashSet<>();
31+
32+
for (ShapePoint shapePoint : feed.shapePoints) {
33+
// store the first found shapePoint when a new shape_id is found
34+
if (shapePoint.shape_id != null && !firstShapePointByShapeId.containsKey(shapePoint.shape_id)) {
35+
firstShapePointByShapeId.put(shapePoint.shape_id, shapePoint);
36+
extraShapeIds.add(shapePoint.shape_id);
37+
}
38+
39+
// continue loop if first shape, or beginning analysis of new shape
40+
if (lastShapePoint == null || !lastShapePoint.shape_id.equals(shapePoint.shape_id)) {
41+
lastShapePoint = shapePoint;
42+
continue;
43+
}
44+
45+
// make sure the shape distance traveled is increasing
46+
if (lastShapePoint.shape_dist_traveled > shapePoint.shape_dist_traveled) {
47+
registerError(shapePoint, SHAPE_SHAPE_DIST_TRAVELED_NOT_INCREASING, shapePoint.shape_dist_traveled);
48+
}
49+
50+
lastShapePoint = shapePoint;
51+
}
52+
53+
// verify that all found shapeIds exist in trips
54+
55+
// compile a list of shape_ids found in the trips table
56+
// Optimization idea: speed up by making custom SQL call to fetch distinct shape_ids from trip table
57+
Set<String> tripShapeIds = new HashSet<>();
58+
for (Trip trip : feed.trips) {
59+
tripShapeIds.add(trip.shape_id);
60+
}
61+
62+
// remove all trip shape ids from the found shape ids in the shapes table
63+
extraShapeIds.removeAll(tripShapeIds);
64+
65+
// iterate over the extra shape Ids and create errors for each
66+
for (String extraShapeId : extraShapeIds) {
67+
registerError(firstShapePointByShapeId.get(extraShapeId), SHAPE_UNUSED, extraShapeId);
68+
}
69+
}
70+
}

src/main/java/com/conveyal/gtfs/validator/SpeedTripValidator.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@ private void checkShapeDistTraveled(StopTime previous, StopTime current) {
100100
current.shape_dist_traveled <= previous.shape_dist_traveled
101101
)
102102
) {
103-
registerError(current, SHAPE_DIST_TRAVELED_NOT_INCREASING, current.shape_dist_traveled);
103+
registerError(current, STOP_TIME_SHAPE_DIST_TRAVELED_NOT_INCREASING, current.shape_dist_traveled);
104104
}
105105
}
106106

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
agency_id,agency_name,agency_url,agency_lang,agency_phone,agency_email,agency_timezone,agency_fare_url,agency_branding_url
2+
1,Fake Transit,,,,,America/Los_Angeles,,
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
service_id,monday,tuesday,wednesday,thursday,friday,saturday,sunday,start_date,end_date
2+
04100312-8fe1-46a5-a9f2-556f39478f57,1,1,1,1,1,1,1,20170915,20170917
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
service_id,date,exception_type
2+
04100312-8fe1-46a5-a9f2-556f39478f57,20170916,2
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
fare_id,price,currency_type,payment_method,transfers,transfer_duration
2+
route_based_fare,1.23,USD,0,0,0
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
fare_id,route_id,origin_id,destination_id,contains_id
2+
route_based_fare,1,,,
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
feed_publisher_name,feed_publisher_url,feed_lang,feed_version
2+
Conveyal,http://www.conveyal.com,en,1.0
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
trip_id,start_time,end_time,headway_secs,exact_times
2+
frequency-trip,08:00:00,09:00:00,1800,0
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
agency_id,route_id,route_short_name,route_long_name,route_desc,route_type,route_url,route_color,route_text_color,route_branding_url
2+
1,1,1,Route 1,,3,,7CE6E7,FFFFFF,
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
shape_id,shape_pt_lat,shape_pt_lon,shape_pt_sequence,shape_dist_traveled
2+
5820f377-f947-4728-ac29-ac0102cbc34e,37.0612132,-122.0074332,1,0.0000000
3+
5820f377-f947-4728-ac29-ac0102cbc34e,37.0611720,-122.0075000,2,7.4997067
4+
5820f377-f947-4728-ac29-ac0102cbc34e,37.0613590,-122.0076830,3,33.8739075
5+
5820f377-f947-4728-ac29-ac0102cbc34e,37.0608780,-122.0082780,4,109.0402932
6+
5820f377-f947-4728-ac29-ac0102cbc34e,37.0603590,-122.0088280,5,84.6078298
7+
5820f377-f947-4728-ac29-ac0102cbc34e,37.0597610,-122.0093540,6,265.8053023
8+
5820f377-f947-4728-ac29-ac0102cbc34e,37.0590660,-122.0099190,7,357.8617018
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
trip_id,arrival_time,departure_time,stop_id,stop_sequence,stop_headsign,pickup_type,drop_off_type,shape_dist_traveled,timepoint
2+
a30277f8-e50a-4a85-9141-b1e0da9d429d,07:00:00,07:00:00,4u6g,1,,0,0,0.0000000,
3+
a30277f8-e50a-4a85-9141-b1e0da9d429d,07:01:00,07:01:00,johv,2,,0,0,341.4491961,
4+
frequency-trip,08:00:00,08:00:00,4u6g,1,,0,0,0.0000000,
5+
frequency-trip,08:01:00,08:01:00,johv,2,,0,0,341.4491961,
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
stop_id,stop_code,stop_name,stop_desc,stop_lat,stop_lon,zone_id,stop_url,location_type,parent_station,stop_timezone,wheelchair_boarding
2+
4u6g,,Butler Ln,,37.0612132,-122.0074332,,,0,,,
3+
johv,,Scotts Valley Dr & Victor Sq,,37.0590172,-122.0096058,,,0,,,
4+
123,,Parent Station,,37.0666,-122.0777,,,1,,,
5+
1234,,Child Stop,,37.06662,-122.07772,,,0,123,,
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
from_stop_id,to_stop_id,transfer_type,min_transfer_time
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
route_id,trip_id,trip_headsign,trip_short_name,direction_id,block_id,shape_id,bikes_allowed,wheelchair_accessible,service_id
2+
1,a30277f8-e50a-4a85-9141-b1e0da9d429d,,,0,,5820f377-f947-4728-ac29-ac0102cbc34e,0,0,04100312-8fe1-46a5-a9f2-556f39478f57
3+
1,frequency-trip,,,0,,5820f377-f947-4728-ac29-ac0102cbc34e,0,0,04100312-8fe1-46a5-a9f2-556f39478f57

0 commit comments

Comments
 (0)