2121import org .schabi .newpipe .extractor .services .soundcloud .SoundcloudParsingHelper ;
2222
2323import java .io .IOException ;
24+ import java .util .ArrayList ;
25+ import java .util .List ;
2426
2527import javax .annotation .Nonnull ;
28+ import javax .annotation .Nullable ;
2629
2730public class SoundcloudCommentsExtractor extends CommentsExtractor {
2831 public static final String COLLECTION = "collection" ;
2932 public static final String NEXT_HREF = "next_href" ;
3033
34+ /**
35+ * The last comment which was a top level comment.
36+ * Next pages might start with replies to the last top level comment
37+ * and therefore the {@link SoundcloudCommentsInfoItemExtractor#replyCount}
38+ * of the last top level comment cannot be determined certainly.
39+ */
40+ @ Nullable private JsonObject lastTopLevelComment ;
41+
3142 public SoundcloudCommentsExtractor (final StreamingService service ,
3243 final ListLinkHandler uiHandler ) {
3344 super (service , uiHandler );
@@ -50,14 +61,15 @@ public InfoItemsPage<CommentsInfoItem> getInitialPage() throws ExtractionExcepti
5061 final CommentsInfoItemsCollector collector = new CommentsInfoItemsCollector (
5162 getServiceId ());
5263
53- collectCommentsFrom (collector , json );
64+ collectCommentsFrom (collector , json , null );
5465
5566 return new InfoItemsPage <>(collector , new Page (json .getString (NEXT_HREF )));
5667 }
5768
5869 @ Override
59- public InfoItemsPage <CommentsInfoItem > getPage (final Page page ) throws ExtractionException ,
60- IOException {
70+ public InfoItemsPage <CommentsInfoItem > getPage (final Page page )
71+ throws ExtractionException , IOException {
72+
6173 if (page == null || isNullOrEmpty (page .getUrl ())) {
6274 throw new IllegalArgumentException ("Page doesn't contain an URL" );
6375 }
@@ -88,7 +100,7 @@ public InfoItemsPage<CommentsInfoItem> getPage(final Page page) throws Extractio
88100 } catch (final JsonParserException e ) {
89101 throw new ParsingException ("Could not parse json" , e );
90102 }
91- collectCommentsFrom (collector , json );
103+ collectCommentsFrom (collector , json , lastTopLevelComment );
92104 }
93105
94106 if (hasNextPage ) {
@@ -101,27 +113,86 @@ public InfoItemsPage<CommentsInfoItem> getPage(final Page page) throws Extractio
101113 @ Override
102114 public void onFetchPage (@ Nonnull final Downloader downloader ) { }
103115
104- private void collectCommentsFrom (final CommentsInfoItemsCollector collector ,
105- final JsonObject json ) throws ParsingException {
116+ /**
117+ * Collect top level comments from a SoundCloud API response.
118+ * @param collector the collector which collects the the top level comments
119+ * @param json the JsonObject of the API response
120+ * @param lastTopLevelComment the last top level comment from the previous page or {@code null}
121+ * if this method is run for the initial page.
122+ * @throws ParsingException
123+ */
124+ private void collectCommentsFrom (@ Nonnull final CommentsInfoItemsCollector collector ,
125+ @ Nonnull final JsonObject json ,
126+ @ Nullable final JsonObject lastTopLevelComment )
127+ throws ParsingException {
128+ final List <SoundcloudCommentsInfoItemExtractor > extractors = new ArrayList <>();
106129 final String url = getUrl ();
107130 final JsonArray entries = json .getArray (COLLECTION );
108- JsonObject lastTopComment = null ;
131+ /**
132+ * The current top level comment.
133+ */
134+ JsonObject currentTopLevelComment = null ;
135+ boolean isLastCommentReply = true ;
136+ // Check whether the first comment in the list is a reply to the last top level comment
137+ // from the previous page if there was a previous page.
138+ if (lastTopLevelComment != null ) {
139+ final JsonObject firstComment = entries .getObject (0 );
140+ if (SoundcloudParsingHelper .isReplyTo (lastTopLevelComment , firstComment )) {
141+ currentTopLevelComment = lastTopLevelComment ;
142+ } else {
143+ extractors .add (new SoundcloudCommentsInfoItemExtractor (
144+ json , SoundcloudCommentsInfoItemExtractor .PREVIOUS_PAGE_INDEX ,
145+ firstComment , url , null ));
146+ }
147+ }
148+
109149 for (int i = 0 ; i < entries .size (); i ++) {
110150 final JsonObject entry = entries .getObject (i );
111- if (i == 0
151+ // extract all top level comments
152+ // The first comment is either a top level comment
153+ // if it is not a reply to the last top level comment
154+ //
155+ if (i == 0 && currentTopLevelComment == null
112156 || (!SoundcloudParsingHelper .isReplyTo (entries .getObject (i - 1 ), entry )
113- && !SoundcloudParsingHelper .isReplyTo (lastTopComment , entry ))) {
114- lastTopComment = entry ;
115- collector .commit (new SoundcloudCommentsInfoItemExtractor (
116- json , i , entry , url ));
157+ && !SoundcloudParsingHelper .isReplyTo (currentTopLevelComment , entry ))) {
158+ currentTopLevelComment = entry ;
159+ if (i == entries .size () - 1 ) {
160+ isLastCommentReply = false ;
161+ this .lastTopLevelComment = currentTopLevelComment ;
162+ // Do not collect the last comment if it is a top level comment
163+ // because it might have replies.
164+ // That is information we cannot get from the comment itself
165+ // (thanks SoundCloud...) but needs to be obtained from the next comment.
166+ // The comment will therefore be collected
167+ // when collecting the items from the next page.
168+ break ;
169+ }
170+ extractors .add (new SoundcloudCommentsInfoItemExtractor (
171+ json , i , entry , url , lastTopLevelComment ));
117172 }
118173 }
174+ if (isLastCommentReply ) {
175+ // Do not collect the last top level comment if it has replies and the retrieved
176+ // comment list ends with a reply. We do not know whether the next page starts
177+ // with more replies to the last top level comment.
178+ this .lastTopLevelComment = extractors .remove (extractors .size () - 1 ).item ;
179+ }
180+ extractors .stream ().forEach (collector ::commit );
181+
119182 }
120183
121- private boolean collectRepliesFrom (final CommentsInfoItemsCollector collector ,
122- final JsonObject json ,
123- final int id ,
124- final String url ) {
184+ /**
185+ * Collect replies to a top level comment from a SoundCloud API response.
186+ * @param collector the collector which collects the the replies
187+ * @param json the SoundCloud API response
188+ * @param id the comment's id for which the replies are collected
189+ * @param url the corresponding page's URL
190+ * @return
191+ */
192+ private boolean collectRepliesFrom (@ Nonnull final CommentsInfoItemsCollector collector ,
193+ @ Nonnull final JsonObject json ,
194+ final int id ,
195+ @ Nonnull final String url ) {
125196 JsonObject originalComment = null ;
126197 final JsonArray entries = json .getArray (COLLECTION );
127198 boolean moreReplies = false ;
0 commit comments