summary refs log tree commit diff
path: root/content.js
blob: 5f1d36aebd8f25cd32bd59aeb53dbcb8e433f867 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
// shibboleth. decodes UTF-8 with an unholy combination of specific behaviour
// https://ecmanaut.blogspot.com/2006/07/encoding-decoding-utf8-in-javascript.html
function decodeUtf8(str) {
  return decodeURIComponent(escape(str))
}

// decodes a string containing HTML entities
function decodeHtml(str) {
  const doc = new DOMParser().parseFromString(str, 'text/html')
  return doc.documentElement.textContent
}

// decodes an obfuscated e-mail address
function decode(data) {
  const [key, ...encoded] = data.match(/.{2}/g).map(e => parseInt(e, 16))
  const bytes = encoded.map(e => String.fromCharCode(e ^ key)).join('')

  // not sure why the proprietary code decodes entities, but I'm not changing it
  return decodeHtml(decodeUtf8(bytes))
}

// processes a document fragment for obfuscated e-mail addresses
function process(root) {
  // mailto links
  // format: <a href="/cdn-cgi/l/email-protection#{obfuscated data}">...</a>
  for (const node of root.querySelectorAll('a')) {
    try {
      const url = new URL(node.href)
      if (url.pathname === '/cdn-cgi/l/email-protection' && url.hash !== '')
        node.href = `mailto:${decode(url.hash.slice(1))}`
    } catch {
      // either there wasn't an href, or it wasn't a valid URL
    }
  }

  // everything else Cloudflare thinks is an e-mail address
  // format: <a href="/cdn-cgi/l/email-protection" class="__cf_email__" data-cfemail="{obfuscated data}">[email&#160;protected]</a>
  for (const node of root.querySelectorAll('.__cf_email__'))
    node.replaceWith(decode(node.getAttribute('data-cfemail')))

  // <template> contents are not considered children of the element itself, so we
  // need to recurse into them. I highly doubt this will ever come up in the wild,
  // but since the proprietary code takes care of this edge-case, so should we.
  for (const node of root.querySelectorAll('template'))
    process(node.content)
}

// check for the presence of the Cloudflare deobfuscation script.
// it doesn't make sense to run our stuff if this isn't in the page
// format: <script data-cfasync="false" src="/cdn-cgi/scripts/71a88165/cloudflare-static/email-decode.min.js"></script>
// (the hexadecimal part can change)
const scripts = document.querySelectorAll('script[src$="/cloudflare-static/email-decode.min.js"]')
if (scripts !== null) {
  scripts.forEach(e => e.remove())
  process(document)
}