worker-planet/index.js

216 lines
5.9 KiB
JavaScript
Raw Normal View History

2021-04-25 21:25:37 +02:00
import Parser from 'rss-parser'
import { Feed } from 'feed'
import Handlebars from 'handlebars/runtime'
import template from './templates/default.precompiled'
2023-07-28 20:10:22 +02:00
import * as striptags from 'striptags'
2021-04-25 21:25:37 +02:00
2023-07-28 20:10:22 +02:00
/**
* Extra Handlerbars template helpers
*/
Handlebars.registerHelper('isRowElemN', function(index, rowItems, n, options) {
return index % rowItems == n ? options.fn(this) : options.inverse(this)
})
/**
* Handle CRON jobs
* Where information is gathered and HTML and RSS is generated.
*/
2021-04-25 21:25:37 +02:00
addEventListener('scheduled', event => {
2023-07-28 20:10:22 +02:00
event.waitUntil(handleScheduled())
2021-04-25 21:25:37 +02:00
})
2023-07-28 20:10:22 +02:00
/**
* Serve the existing generated elements.
* CACHE is used to speed up the operation.
*/
addEventListener('fetch', event => {
event.respondWith(handleRequest(event.request))
})
2021-04-25 21:25:37 +02:00
/**
2021-04-25 21:25:37 +02:00
* Deliver aggregated content according to the formats requested
* @param {Request} request
* @returns Response
*/
async function handleRequest(request) {
2021-04-25 21:25:37 +02:00
const cacheUrl = new URL(request.url)
const cacheKey = new Request(cacheUrl.toString(), request)
const cache = caches.default
const cacheMaxAge = CACHE_MAX_AGE || 3600
2021-04-25 21:25:37 +02:00
let response = await cache.match(cacheKey)
if (response) return response
const path = new URL(request.url).pathname
if (path === '/') {
let content = await WORKER_PLANET_STORE.get('html')
response = new Response(content, {
2021-04-25 21:54:21 +02:00
headers: {
'content-type': 'text/html;charset=UTF-8',
'Cache-Control': `max-age=${cacheMaxAge}`,
2021-04-25 21:54:21 +02:00
},
2021-04-25 21:25:37 +02:00
})
} else if (path === '/rss') {
let content = await WORKER_PLANET_STORE.get('rss')
response = new Response(content, {
2021-04-25 21:54:21 +02:00
headers: {
'content-type': 'application/rss+xml',
2024-02-10 13:20:04 +01:00
'Cache-Control': `max-age=${cacheMaxAge}`,
},
})
} else if (path === '/atom') {
let content = await WORKER_PLANET_STORE.get('atom')
response = new Response(content, {
headers: {
'content-type': 'application/atom+xml',
'Cache-Control': `max-age=${cacheMaxAge}`,
2021-04-25 21:54:21 +02:00
},
2021-04-25 21:25:37 +02:00
})
} else {
return new Response('', { status: 404 })
}
await cache.put(cacheKey, response.clone())
return response
}
/**
* Fetch all source feeds and generate the aggregated content
*/
2023-07-28 20:10:22 +02:00
async function handleScheduled() {
2021-04-25 21:25:37 +02:00
let feeds = FEEDS.split(',')
let content = []
2021-05-30 23:14:13 +02:00
let sources = []
let promises = []
2021-04-25 21:25:37 +02:00
for (let url of feeds) {
promises.push(fetchAndHydrate(url))
}
const results = await Promise.allSettled(promises)
for (let [index, result] of results.entries()) {
if (result.status == 'fulfilled') {
let posts = result.value
let title = posts[0].source_title
let link = posts[0].source_link
let name = title != '' ? title : new URL(link).host
sources.push({ name, link })
content.push(...posts)
} else {
console.log(`Failed to fetch ${feeds[index]}`)
console.log(result.reason)
2021-04-25 21:25:37 +02:00
}
}
//sort all the elements chronologically (recent first)
content.sort((a, b) => {
let aDate = new Date(a.isoDate)
let bDate = new Date(b.isoDate)
if (aDate < bDate) {
return 1
} else if (aDate === bDate) {
return 0
} else {
return -1
}
})
if (content.length > MAX_SIZE) {
content = content.slice(0, MAX_SIZE)
}
// Generate feed
let feed = createFeed(content)
2021-05-30 23:14:13 +02:00
let html = createHTML(content, sources)
2021-04-25 21:25:37 +02:00
// Store
await WORKER_PLANET_STORE.put('rss', feed.rss2())
2024-02-10 13:20:04 +01:00
await WORKER_PLANET_STORE.put('atom', feed.atom1())
2021-04-25 21:25:37 +02:00
await WORKER_PLANET_STORE.put('html', html)
}
/**
* Take a feed URL, fetch all items and attach source information
* @param {String} feed The URL of the feed to be fetched and parsed
* @returns Array containing all the feed items parsed by rss-parser
2021-04-25 21:25:37 +02:00
*/
async function fetchAndHydrate(feed) {
console.log(`[fetchAndHydrate] start to fetch feed: ${feed}`)
let resp = await fetch(feed)
console.log(`[fetchAndHydrate] response: ${resp.status}`)
let parser = new Parser()
let content = await resp.text()
let contentFeed = await parser.parseString(content)
for (let item of contentFeed.items) {
item.source_title = contentFeed.title
item.source_link = contentFeed.link
if ('content:encoded' in item) {
item.content = item['content:encoded']
}
2021-04-25 21:25:37 +02:00
}
console.log(
`[fetchAndHydrate] Finished fetch feed: ${feed}. ${contentFeed.items.length} items gathered`,
)
return contentFeed.items
}
/**
* Builds a feed object from the provided items
* @param {Array} items parsed by rss-parser
* @return Feed object created by feed
2021-04-25 21:25:37 +02:00
*/
function createFeed(items) {
2021-05-30 23:14:13 +02:00
console.log(`[createFeed] start building the aggregated feed`)
2021-04-25 21:25:37 +02:00
const feed = new Feed({
title: TITLE,
description: DESCRIPTION,
2021-05-16 21:00:47 +02:00
id: CUSTOM_URL,
link: CUSTOM_URL,
})
2021-04-25 21:25:37 +02:00
for (let item of items) {
feed.addItem({
title: item.title,
id: item.guid,
link: item.link,
description: item.contentSnippet,
content: item.content,
author: [
{
name: item.creator,
email: '',
link: item.source_link,
},
],
contributor: [],
date: new Date(item.isoDate),
})
}
2021-05-30 23:14:13 +02:00
console.log(`[createFeed] Finished building the aggregated feed`)
2021-04-25 21:25:37 +02:00
return feed
}
/**
* Generate the HTML page with the aggregated contents
* @param {Array} items parsed by rss-parser
* @returns String with HTML page containing the parsed contents
2021-04-25 21:25:37 +02:00
*/
2021-05-30 23:14:13 +02:00
function createHTML(items, sources) {
console.log(`[createHTML] building the HTML document`)
let template = Handlebars.templates['default']
2023-07-28 20:10:22 +02:00
let dateFormatter = new Intl.DateTimeFormat('pt-PT', { timeZone: 'UTC' })
for (let item of items) {
let shortdescription = striptags(item.content).substring(0, 250)
item.description = shortdescription ? shortdescription + ' [...]' : ''
2023-07-28 20:10:22 +02:00
item.formattedDate = item.pubDate
? dateFormatter.format(new Date(item.pubDate))
: ''
}
return template({
items: items,
2021-05-30 23:14:13 +02:00
sources: sources,
page_title: TITLE,
page_description: DESCRIPTION,
})
}