2626import com .google .cloud .dataflow .sdk .coders .KvCoder ;
2727import com .google .cloud .dataflow .sdk .coders .MapCoder ;
2828import com .google .cloud .dataflow .sdk .coders .StringUtf8Coder ;
29+ import com .google .cloud .dataflow .sdk .options .DataflowPipelineOptions ;
30+ import com .google .cloud .dataflow .sdk .options .DirectPipelineOptions ;
31+ import com .google .cloud .dataflow .sdk .options .PipelineOptionsFactory ;
32+ import com .google .cloud .dataflow .sdk .runners .DataflowPipelineRunner ;
33+ import com .google .cloud .dataflow .sdk .runners .DirectPipelineRunner ;
2934import com .google .cloud .dataflow .sdk .testing .DataflowAssert ;
3035import com .google .cloud .dataflow .sdk .testing .RunnableOnService ;
3136import com .google .cloud .dataflow .sdk .testing .TestPipeline ;
3237import com .google .cloud .dataflow .sdk .transforms .windowing .FixedWindows ;
3338import com .google .cloud .dataflow .sdk .transforms .windowing .InvalidWindows ;
3439import com .google .cloud .dataflow .sdk .transforms .windowing .Sessions ;
3540import com .google .cloud .dataflow .sdk .transforms .windowing .Window ;
41+ import com .google .cloud .dataflow .sdk .util .NoopPathValidator ;
42+ import com .google .cloud .dataflow .sdk .util .WindowingStrategy ;
3643import com .google .cloud .dataflow .sdk .values .KV ;
44+ import com .google .cloud .dataflow .sdk .values .PBegin ;
3745import com .google .cloud .dataflow .sdk .values .PCollection ;
3846
3947import org .joda .time .Duration ;
5765public class GroupByKeyTest {
5866
5967 @ Rule
60- public ExpectedException expectedEx = ExpectedException .none ();
68+ public ExpectedException thrown = ExpectedException .none ();
6169
6270 @ Test
6371 @ Category (RunnableOnService .class )
@@ -167,8 +175,6 @@ public void testGroupByKeyEmpty() {
167175
168176 @ Test
169177 public void testGroupByKeyNonDeterministic () throws Exception {
170- expectedEx .expect (IllegalStateException .class );
171- expectedEx .expectMessage ("must be deterministic" );
172178
173179 List <KV <Map <String , String >, Integer >> ungroupedPairs = Arrays .asList ();
174180
@@ -180,9 +186,9 @@ public void testGroupByKeyNonDeterministic() throws Exception {
180186 KvCoder .of (MapCoder .of (StringUtf8Coder .of (), StringUtf8Coder .of ()),
181187 BigEndianIntegerCoder .of ())));
182188
189+ thrown .expect (IllegalStateException .class );
190+ thrown .expectMessage ("must be deterministic" );
183191 input .apply (GroupByKey .<Map <String , String >, Integer >create ());
184-
185- p .run ();
186192 }
187193
188194 @ Test
@@ -230,9 +236,30 @@ public void testWindowFnInvalidation() {
230236 Duration .standardMinutes (1 )))));
231237 }
232238
239+ /**
240+ * Create a test pipeline that uses the {@link DataflowPipelineRunner} so that {@link GroupByKey}
241+ * is not expanded. This is used for verifying that even without expansion the proper errors show
242+ * up.
243+ */
244+ private Pipeline createTestServiceRunner () {
245+ DataflowPipelineOptions options = PipelineOptionsFactory .as (DataflowPipelineOptions .class );
246+ options .setRunner (DataflowPipelineRunner .class );
247+ options .setProject ("someproject" );
248+ options .setStagingLocation ("gs://staging" );
249+ options .setPathValidatorClass (NoopPathValidator .class );
250+ options .setDataflowClient (null );
251+ return Pipeline .create (options );
252+ }
253+
254+ private Pipeline createTestDirectRunner () {
255+ DirectPipelineOptions options = PipelineOptionsFactory .as (DirectPipelineOptions .class );
256+ options .setRunner (DirectPipelineRunner .class );
257+ return Pipeline .create (options );
258+ }
259+
233260 @ Test
234- public void testInvalidWindows () {
235- Pipeline p = TestPipeline . create ();
261+ public void testInvalidWindowsDirect () {
262+ Pipeline p = createTestDirectRunner ();
236263
237264 List <KV <String , Integer >> ungroupedPairs = Arrays .asList ();
238265
@@ -242,15 +269,30 @@ public void testInvalidWindows() {
242269 .apply (Window .<KV <String , Integer >>into (
243270 Sessions .withGapDuration (Duration .standardMinutes (1 ))));
244271
245- try {
246- input
247- .apply ("GroupByKey" , GroupByKey .<String , Integer >create ())
248- .apply ("GroupByKeyAgain" , GroupByKey .<String , Iterable <Integer >>create ());
249- Assert .fail ("Exception should have been thrown" );
250- } catch (IllegalStateException e ) {
251- Assert .assertTrue (e .getMessage ().startsWith (
252- "GroupByKey must have a valid Window merge function." ));
253- }
272+ thrown .expect (IllegalStateException .class );
273+ thrown .expectMessage ("GroupByKey must have a valid Window merge function" );
274+ input
275+ .apply ("GroupByKey" , GroupByKey .<String , Integer >create ())
276+ .apply ("GroupByKeyAgain" , GroupByKey .<String , Iterable <Integer >>create ());
277+ }
278+
279+ @ Test
280+ public void testInvalidWindowsService () {
281+ Pipeline p = createTestServiceRunner ();
282+
283+ List <KV <String , Integer >> ungroupedPairs = Arrays .asList ();
284+
285+ PCollection <KV <String , Integer >> input =
286+ p .apply (Create .of (ungroupedPairs )
287+ .withCoder (KvCoder .of (StringUtf8Coder .of (), BigEndianIntegerCoder .of ())))
288+ .apply (Window .<KV <String , Integer >>into (
289+ Sessions .withGapDuration (Duration .standardMinutes (1 ))));
290+
291+ thrown .expect (IllegalStateException .class );
292+ thrown .expectMessage ("GroupByKey must have a valid Window merge function" );
293+ input
294+ .apply ("GroupByKey" , GroupByKey .<String , Integer >create ())
295+ .apply ("GroupByKeyAgain" , GroupByKey .<String , Iterable <Integer >>create ());
254296 }
255297
256298 @ Test
@@ -278,6 +320,48 @@ public void testRemerge() {
278320 Sessions .withGapDuration (Duration .standardMinutes (1 ))));
279321 }
280322
323+ @ Test
324+ public void testGroupByKeyDirectUnbounded () {
325+ Pipeline p = createTestDirectRunner ();
326+
327+ PCollection <KV <String , Integer >> input = p
328+ .apply (new PTransform <PBegin , PCollection <KV <String , Integer >>>() {
329+ @ Override
330+ public PCollection <KV <String , Integer >> apply (PBegin input ) {
331+ return PCollection .createPrimitiveOutputInternal (input .getPipeline (),
332+ WindowingStrategy .globalDefault (), PCollection .IsBounded .UNBOUNDED );
333+ }
334+ });
335+
336+ thrown .expect (IllegalStateException .class );
337+ thrown .expectMessage (
338+ "GroupByKey cannot be applied to non-bounded PCollection in the GlobalWindow without "
339+ + "a trigger. Use a Window.into or Window.triggering transform prior to GroupByKey." );
340+
341+ input .apply ("GroupByKey" , GroupByKey .<String , Integer >create ());
342+ }
343+
344+ @ Test
345+ public void testGroupByKeyServiceUnbounded () {
346+ Pipeline p = createTestServiceRunner ();
347+
348+ PCollection <KV <String , Integer >> input = p
349+ .apply (new PTransform <PBegin , PCollection <KV <String , Integer >>>() {
350+ @ Override
351+ public PCollection <KV <String , Integer >> apply (PBegin input ) {
352+ return PCollection .createPrimitiveOutputInternal (input .getPipeline (),
353+ WindowingStrategy .globalDefault (), PCollection .IsBounded .UNBOUNDED );
354+ }
355+ });
356+
357+ thrown .expect (IllegalStateException .class );
358+ thrown .expectMessage (
359+ "GroupByKey cannot be applied to non-bounded PCollection in the GlobalWindow without "
360+ + "a trigger. Use a Window.into or Window.triggering transform prior to GroupByKey." );
361+
362+ input .apply ("GroupByKey" , GroupByKey .<String , Integer >create ());
363+ }
364+
281365 @ Test
282366 public void testGroupByKeyGetName () {
283367 Assert .assertEquals ("GroupByKey" , GroupByKey .<String , Integer >create ().getName ());
0 commit comments