11/*
22 * This file is part of the Heritrix web crawler (crawler.archive.org).
33 *
4- * Licensed to the Internet Archive (IA) by one or more individual
5- * contributors.
4+ * Licensed to the Internet Archive (IA) by one or more individual
5+ * contributors.
66 *
77 * The IA licenses this file to You under the Apache License, Version 2.0
88 * (the "License"); you may not use this file except in compliance with
1818 */
1919package org .archive .crawler .frontier ;
2020
21- import java .util .logging .Logger ;
22-
23- import org .archive .url .URIException ;
21+ import com .sleepycat .je .Database ;
22+ import com .sleepycat .je .DatabaseException ;
23+ import com .sleepycat .je .EnvironmentConfig ;
24+ import com .sleepycat .je .tree .Key ;
25+ import org .apache .commons .io .FileUtils ;
26+ import org .archive .bdb .BdbModule ;
27+ import org .archive .bdb .StoredQueue ;
2428import org .archive .modules .CrawlURI ;
2529import org .archive .modules .SchedulingConstants ;
30+ import org .archive .net .UURI ;
2631import org .archive .net .UURIFactory ;
27-
28- import com .sleepycat .je .tree .Key ;
29-
32+ import org .archive .url .URIException ;
33+ import org .archive .util .Recorder ;
34+ import org .archive .util .bdbje .EnhancedEnvironment ;
35+ import org .junit .jupiter .api .AfterEach ;
36+ import org .junit .jupiter .api .BeforeEach ;
3037import org .junit .jupiter .api .Test ;
38+ import org .junit .jupiter .api .io .TempDir ;
39+
40+ import java .io .File ;
41+ import java .io .IOException ;
42+ import java .nio .file .Path ;
43+ import java .util .logging .Logger ;
3144
45+ import static org .junit .jupiter .api .Assertions .assertNull ;
3246import static org .junit .jupiter .api .Assertions .assertTrue ;
3347
3448/**
35- * Unit tests for BdbMultipleWorkQueues functionality.
36- *
49+ * Unit tests for BdbMultipleWorkQueues functionality.
50+ *
3751 * @author gojomo
3852 */
3953public class BdbMultipleWorkQueuesTest {
4054 private static Logger logger =
4155 Logger .getLogger (BdbMultipleWorkQueuesTest .class .getName ());
56+ @ TempDir
57+ Path tempDir ;
58+ @ TempDir
59+ Path curiTempDir ;
60+
61+ private BdbMultipleWorkQueues pendingUris = null ;
62+ private EnhancedEnvironment env ;
63+ private Database db ;
64+ private File envDir ;
65+
66+ protected Recorder getRecorder () throws IOException {
67+ if (Recorder .getHttpRecorder () == null ) {
68+ Recorder httpRecorder = new Recorder (curiTempDir .toFile (),
69+ getClass ().getName (), 16 * 1024 , 512 * 1024 );
70+ Recorder .setHttpRecorder (httpRecorder );
71+ }
72+
73+ return Recorder .getHttpRecorder ();
74+ }
75+
76+ protected CrawlURI makeCrawlURI (String uri ) throws URIException ,
77+ IOException {
78+ UURI uuri = UURIFactory .getInstance (uri );
79+ CrawlURI curi = new CrawlURI (uuri );
80+ curi .setClassKey ("key" );
81+ curi .setSeed (true );
82+ curi .setRecorder (getRecorder ());
83+ return curi ;
84+ }
85+
86+ @ BeforeEach
87+ protected void setUp () throws Exception {
88+ this .envDir = new File (tempDir .toFile (),"BdbMultipleWorkQueuesTest" );
89+ org .archive .util .FileUtils .ensureWriteableDirectory (this .envDir );
90+ try {
91+ EnvironmentConfig envConfig = new EnvironmentConfig ();
92+ envConfig .setTransactional (false );
93+ envConfig .setAllowCreate (true );
94+ env = new EnhancedEnvironment (envDir ,envConfig );
95+ BdbModule .BdbConfig dbConfig = StoredQueue .databaseConfig ();
96+ db = env .openDatabase (null , "StoredMapTest" , dbConfig .toDatabaseConfig ());
97+ } catch (DatabaseException e ) {
98+ throw new RuntimeException (e );
99+ }
100+ this .pendingUris = new BdbMultipleWorkQueues (db , env .getClassCatalog ());
101+
102+ }
103+
104+ @ AfterEach
105+ protected void tearDown () throws Exception {
106+ if (this .pendingUris !=null )
107+ this .pendingUris .close ();
108+ if (this .envDir .exists ()) {
109+ FileUtils .deleteDirectory (this .envDir );
110+ }
111+ }
42112
43-
44113 /**
45114 * Basic sanity checks for calculateInsertKey() -- ensure ordinal, cost,
46115 * and schedulingDirective have the intended effects, for ordinal values
@@ -53,38 +122,38 @@ public void testCalculateInsertKey() throws URIException {
53122 }
54123
55124 for (long ordinalOrigin = 1 ; ordinalOrigin < Long .MAX_VALUE / 4 ; ordinalOrigin <<= 1 ) {
56- CrawlURI curi1 =
125+ CrawlURI curi1 =
57126 new CrawlURI (UURIFactory .getInstance ("http://archive.org/foo" ));
58127 curi1 .setOrdinal (ordinalOrigin );
59128 curi1 .setClassKey ("foo" );
60- byte [] key1 =
129+ byte [] key1 =
61130 BdbMultipleWorkQueues .calculateInsertKey (curi1 ).getData ();
62- CrawlURI curi2 =
131+ CrawlURI curi2 =
63132 new CrawlURI (UURIFactory .getInstance ("http://archive.org/bar" ));
64133 curi2 .setOrdinal (ordinalOrigin + 1 );
65134 curi2 .setClassKey ("foo" );
66- byte [] key2 =
135+ byte [] key2 =
67136 BdbMultipleWorkQueues .calculateInsertKey (curi2 ).getData ();
68- CrawlURI curi3 =
137+ CrawlURI curi3 =
69138 new CrawlURI (UURIFactory .getInstance ("http://archive.org/baz" ));
70139 curi3 .setOrdinal (ordinalOrigin + 2 );
71140 curi3 .setClassKey ("foo" );
72141 curi3 .setSchedulingDirective (SchedulingConstants .HIGH );
73- byte [] key3 =
142+ byte [] key3 =
74143 BdbMultipleWorkQueues .calculateInsertKey (curi3 ).getData ();
75- CrawlURI curi4 =
144+ CrawlURI curi4 =
76145 new CrawlURI (UURIFactory .getInstance ("http://archive.org/zle" ));
77146 curi4 .setOrdinal (ordinalOrigin + 3 );
78147 curi4 .setClassKey ("foo" );
79148 curi4 .setPrecedence (2 );
80- byte [] key4 =
149+ byte [] key4 =
81150 BdbMultipleWorkQueues .calculateInsertKey (curi4 ).getData ();
82- CrawlURI curi5 =
151+ CrawlURI curi5 =
83152 new CrawlURI (UURIFactory .getInstance ("http://archive.org/gru" ));
84153 curi5 .setOrdinal (ordinalOrigin + 4 );
85154 curi5 .setClassKey ("foo" );
86155 curi5 .setPrecedence (1 );
87- byte [] key5 =
156+ byte [] key5 =
88157 BdbMultipleWorkQueues .calculateInsertKey (curi5 ).getData ();
89158 // ensure that key1 (with lower ordinal) sorts before key2 (higher
90159 // ordinal)
@@ -101,4 +170,42 @@ public void testCalculateInsertKey() throws URIException {
101170 "lower cost sorting first (" + ordinalOrigin + ")" );
102171 }
103172 }
173+
174+ @ Test
175+ public void testThreadInterrupt () throws InterruptedException , IOException {
176+ MockToeThread mockToeThread = new MockToeThread (this .pendingUris , makeCrawlURI ("http://www.archive.org" ));
177+
178+ mockToeThread .start ();
179+
180+ while (mockToeThread .isAlive ()) {
181+ Thread .sleep (100 );
182+ }
183+ mockToeThread .join ();
184+ assertNull (mockToeThread .thrownException );
185+
186+ }
187+ class MockToeThread extends Thread {
188+ BdbMultipleWorkQueues pendingUris ;
189+ CrawlURI curi ;
190+ Exception thrownException ;
191+ public MockToeThread (BdbMultipleWorkQueues pendingUris , CrawlURI curi ) {
192+ this .pendingUris = pendingUris ;
193+ this .curi = curi ;
194+ this .thrownException = null ;
195+ }
196+ @ Override
197+ public void run () {
198+ this .pendingUris .put (this .curi , true );
199+
200+ Thread .currentThread ().interrupt ();
201+ try {
202+ this .pendingUris .put (this .curi , true );
203+ }
204+ catch (com .sleepycat .je .EnvironmentFailureException ex ) {
205+ this .thrownException = ex ;
206+ }
207+
208+ }
209+ }
210+
104211}
0 commit comments