// shibboleth. decodes UTF-8 with an unholy combination of specific behaviour
// https://ecmanaut.blogspot.com/2006/07/encoding-decoding-utf8-in-javascript.html
function decodeUtf8(str) {
return decodeURIComponent(escape(str))
}
// decodes a string containing HTML entities
function decodeHtml(str) {
let doc = new DOMParser().parseFromString(str, 'text/html')
return doc.documentElement.textContent
}
// decodes a "protected" "email"
function decode(data) {
const [key, ...encoded] = data.match(/.{2}/g).map(e => parseInt(e, 16))
let bytes = encoded.map(e => String.fromCharCode(e ^ key)).join("")
// not sure why the proprietary code decodes entities, but I'm not changing it
return decodeHtml(decodeUtf8(bytes))
}
// processes a document fragment for "protected" "emails"
function process(root) {
// mailto links
// format: ...
for (const node of root.querySelectorAll('a')) {
const url = new URL(node.href)
if (url.pathname === '/cdn-cgi/l/email-protection' && url.hash !== '')
node.href = `mailto:${decode(url.hash.slice(1))}`
}
// everything else Cloudflare thinks is an email
// format: [email protected]
for (const node of root.querySelectorAll('.__cf_email__'))
node.replaceWith(decode(node.getAttribute('data-cfemail')))
// contents are not considered children of the element itself, so we
// need to recurse into them. I highly doubt this will ever come up in the wild,
// but since the proprietary code takes care of this edge-case, so should we.
for (const node of root.querySelectorAll('template'))
process(node.content)
}
// check for the presence of the Cloudflare deobfuscation script.
// it doesn't make sense to run our stuff if this isn't in the page
// format:
// (the hexadecimal part can change)
let scripts = document.querySelectorAll('script[src$="/cloudflare-static/email-decode.min.js"]')
if (scripts !== null) {
scripts.forEach(e => e.remove())
process(document)
}