83 lines
2.6 KiB
TypeScript
83 lines
2.6 KiB
TypeScript
|
import { ArsTechnicaHandler } from '../src/websites/ars-technica-handler'
|
|||
|
import fs from 'fs'
|
|||
|
import nock from 'nock'
|
|||
|
import { expect } from 'chai'
|
|||
|
import { parseHTML } from 'linkedom'
|
|||
|
|
|||
|
describe('Testing parsing multi-page articles from arstechnica.', () => {
|
|||
|
let orignalArticle: Document | undefined
|
|||
|
let htmlPg1: string | null
|
|||
|
let htmlPg2: string | null
|
|||
|
let htmlPg3: string | null
|
|||
|
|
|||
|
const load = (path: string): string => {
|
|||
|
return fs.readFileSync(path, 'utf8')
|
|||
|
}
|
|||
|
|
|||
|
before(() => {
|
|||
|
htmlPg1 = load('./test/data/ars-multipage/ars-technica-page-1.html')
|
|||
|
htmlPg2 = load('./test/data/ars-multipage/ars-technica-page-2.html')
|
|||
|
htmlPg3 = load('./test/data/ars-multipage/ars-technica-page-3.html')
|
|||
|
|
|||
|
orignalArticle = parseHTML(htmlPg1).document
|
|||
|
})
|
|||
|
|
|||
|
beforeEach(() => {
|
|||
|
nock('https://arstechnica.com').get('/article/').reply(200, htmlPg1!)
|
|||
|
nock('https://arstechnica.com').get('/article/2/').reply(200, htmlPg2!)
|
|||
|
nock('https://arstechnica.com').get('/article/3/').reply(200, htmlPg3!)
|
|||
|
})
|
|||
|
|
|||
|
afterEach(() => {
|
|||
|
nock.cleanAll();
|
|||
|
})
|
|||
|
|
|||
|
it('should parse the title of the atlantic article.', async () => {
|
|||
|
const response = await new ArsTechnicaHandler().preHandle(
|
|||
|
'https://arstechnica.com/article/'
|
|||
|
)
|
|||
|
|
|||
|
// We grab the title from the doucment.
|
|||
|
expect(response.title).not.to.be.undefined
|
|||
|
expect(response.title).to.equal(
|
|||
|
'What’s going on with the reports of a room-temperature superconductor? | Ars Technica'
|
|||
|
)
|
|||
|
})
|
|||
|
|
|||
|
it('should remove the navigation links', async () => {
|
|||
|
const response = await new ArsTechnicaHandler().preHandle(
|
|||
|
'https://arstechnica.com/article/'
|
|||
|
)
|
|||
|
|
|||
|
expect(orignalArticle?.querySelector('nav.page-numbers')).not.to.be.null
|
|||
|
expect(response.dom?.querySelectorAll('nav.page-numbers').length).to.equal(0);
|
|||
|
})
|
|||
|
|
|||
|
it('should append all new content into the main article', async () => {
|
|||
|
const response = await new ArsTechnicaHandler().preHandle(
|
|||
|
'https://arstechnica.com/article/'
|
|||
|
)
|
|||
|
|
|||
|
// We name the div to ensure we can validate that it has been inserted.
|
|||
|
expect(
|
|||
|
orignalArticle?.getElementsByClassName('nextPageContents')?.length || 0
|
|||
|
).to.equal(0)
|
|||
|
expect(
|
|||
|
response.dom?.getElementsByClassName('nextPageContents')?.length || 0
|
|||
|
).not.to.equal(0)
|
|||
|
})
|
|||
|
|
|||
|
it('should remove any related content links.', async () => {
|
|||
|
const response = await new ArsTechnicaHandler().preHandle(
|
|||
|
'https://arstechnica.com/article/'
|
|||
|
)
|
|||
|
|
|||
|
// This exists in the HTML, but we remove it when preparsing.
|
|||
|
expect(
|
|||
|
response.dom?.getElementsByClassName(
|
|||
|
'ArticleRelatedContentModule_root__BBa6g'
|
|||
|
).length
|
|||
|
).to.eql(0)
|
|||
|
})
|
|||
|
})
|