Skip to content

Commit 29054df

Browse files
committed
fix: use example sample for demo
1 parent 6bcd82d commit 29054df

File tree

2 files changed

+32
-3
lines changed

2 files changed

+32
-3
lines changed

glint/src/main/java/co/clflushopt/glint/App.java

+32-2
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,14 @@
11
package co.clflushopt.glint;
22

33
import java.io.FileNotFoundException;
4+
import java.util.Arrays;
45
import java.util.Iterator;
6+
import java.util.List;
7+
import java.util.Optional;
58

69
import org.apache.arrow.vector.types.pojo.ArrowType;
710

11+
import co.clflushopt.glint.core.CsvReaderOptions;
812
import co.clflushopt.glint.core.ExecutionContext;
913
import co.clflushopt.glint.dataframe.DataFrame;
1014
import co.clflushopt.glint.query.logical.expr.AggregateExpr;
@@ -13,7 +17,10 @@
1317
import co.clflushopt.glint.query.logical.expr.LogicalExpr;
1418
import co.clflushopt.glint.query.logical.plan.LogicalPlan;
1519
import co.clflushopt.glint.query.optimizer.QueryOptimizer;
20+
import co.clflushopt.glint.types.ArrowTypes;
21+
import co.clflushopt.glint.types.Field;
1622
import co.clflushopt.glint.types.RecordBatch;
23+
import co.clflushopt.glint.types.Schema;
1724

1825
/**
1926
* Hello world!
@@ -35,9 +42,32 @@ public static void nycTripsBenchmark(String[] args) throws FileNotFoundException
3542

3643
long startTime = System.currentTimeMillis();
3744
try {
38-
45+
// Define the schema for NYC Taxi dataset
46+
Schema schema = new Schema(Arrays.asList(new Field("VendorID", ArrowTypes.Int32Type),
47+
new Field("tpep_pickup_datetime", ArrowTypes.StringType), // Could be Timestamp
48+
new Field("tpep_dropoff_datetime", ArrowTypes.StringType), // Could be Timestamp
49+
new Field("passenger_count", ArrowTypes.Int32Type),
50+
new Field("trip_distance", ArrowTypes.DoubleType),
51+
new Field("pickup_longitude", ArrowTypes.DoubleType),
52+
new Field("pickup_latitude", ArrowTypes.DoubleType),
53+
new Field("RatecodeID", ArrowTypes.Int32Type),
54+
new Field("store_and_fwd_flag", ArrowTypes.StringType),
55+
new Field("dropoff_longitude", ArrowTypes.DoubleType),
56+
new Field("dropoff_latitude", ArrowTypes.DoubleType),
57+
new Field("payment_type", ArrowTypes.Int32Type),
58+
new Field("fare_amount", ArrowTypes.DoubleType),
59+
new Field("extra", ArrowTypes.DoubleType),
60+
new Field("mta_tax", ArrowTypes.DoubleType),
61+
new Field("tip_amount", ArrowTypes.DoubleType),
62+
new Field("tolls_amount", ArrowTypes.DoubleType),
63+
new Field("improvement_surcharge", ArrowTypes.DoubleType),
64+
new Field("total_amount", ArrowTypes.DoubleType)));
3965
// Create DataFrame and apply transformations
40-
DataFrame df = ctx.readParquet("./datasets/yellow_tripdata_2019-01.parquet", null);
66+
DataFrame df = ctx
67+
.readCsv("./datasets/yellow_tripdata_example.csv", Optional.of(schema),
68+
CsvReaderOptions.builder().delimiter(',').hasHeader(true).build())
69+
.aggregate(List.of(col("passenger_count")),
70+
List.of(max(cast(col("fare_amount"), ArrowTypes.FloatType))));
4171

4272
System.out.println("Logical Plan:\t" + LogicalPlan.format(df.getLogicalPlan()));
4373
System.out.println("Schema:\t" + df.getSchema());

glint/src/main/java/co/clflushopt/glint/query/physical/plan/HashAggregateOperator.java

-1
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,6 @@ public Iterator<RecordBatch> execute() {
6060
Iterator<RecordBatch> inputIter = input.execute();
6161
while (inputIter.hasNext()) {
6262
RecordBatch batch = inputIter.next();
63-
6463
// Evaluate grouping expressions
6564
List<ColumnVector> groupKeys = groupByExpr.stream().map(expr -> expr.eval(batch))
6665
.collect(Collectors.toList());

0 commit comments

Comments
 (0)