Skip to content

Commit 588642b

Browse files
authored
Merge pull request #97 from ruguoapp/span
Replace text node followed by br with a p tag
2 parents e513e9c + 4be4e45 commit 588642b

File tree

3 files changed

+4256
-5
lines changed

3 files changed

+4256
-5
lines changed

src/helpers.js

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -116,11 +116,20 @@ var grabArticle = module.exports.grabArticle = function(document, preserveUnlike
116116
// EXPERIMENTAL
117117
Array.prototype.slice.call(node.childNodes).forEach(function(childNode) {
118118
if (childNode.nodeType == 3 /*TEXT_NODE*/ ) {
119-
// use span instead of p. Need more tests.
120-
dbg("replacing text node with a span tag with the same content.");
121-
var span = document.createElement('span');
122-
span.innerHTML = childNode.nodeValue;
123-
childNode.parentNode.replaceChild(span, childNode);
119+
var nextSibling = childNode.nextSibling
120+
if (nextSibling && nextSibling.tagName == 'BR') {
121+
dbg("replacing text node followed by br with a p tag with the same content.");
122+
var p = document.createElement('p');
123+
p.innerHTML = childNode.nodeValue;
124+
childNode.parentNode.removeChild(nextSibling)
125+
childNode.parentNode.replaceChild(p, childNode);
126+
} else {
127+
// use span instead of p. Need more tests.
128+
dbg("replacing text node with a span tag with the same content.");
129+
var span = document.createElement('span');
130+
span.innerHTML = childNode.nodeValue;
131+
childNode.parentNode.replaceChild(span, childNode);
132+
}
124133
}
125134
});
126135
}

test/article-tests.js

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,20 @@ describe('Regression Tests', function() {
6767
'ライフハック',
6868
'個人情報の取り扱いについて',
6969
],
70+
},
71+
{
72+
fixture: 'douban-group-topic',
73+
title: '半年面试了上百人,经验总结。。',
74+
include: [
75+
'看到组里很多初出社会的小伙伴愁工作的事,我想给大家讲一讲个人的经验,希望尽量给大家一点帮助,少走一点弯路',
76+
'其他就不一一列举了,重点是展示出【高匹配度】',
77+
'最近工作遇到瓶颈,毕竟不会一直一帆风顺,调整好了之后会继续分享经验的,谢谢大家这么久的关注。',
78+
],
79+
notInclude: [
80+
'最赞回应',
81+
'最新话题',
82+
'北京豆网科技有限公司',
83+
]
7084
}].forEach(function(testCase) {
7185
it('can extract ' + testCase.fixture + ' articles', function(done) {
7286
var html = fs.readFileSync(articleFixtures + '/' + testCase.fixture + '.html').toString();

0 commit comments

Comments
 (0)