khinsider-downloads/main.ts

import { existsSync } from 'node:fs'
import { mkdir, writeFile } from 'node:fs/promises'
import path from 'node:path'
import { argv } from 'node:process'
import { Buffer } from "node:buffer"
import * as cheerio from "https://esm.sh/cheerio?target=esnext"

const REGEX_PLAYLISTURL = /^https:\/\/downloads\.khinsider\.com\/game-soundtracks\/album\/[^\/]+\/?$/i
const REGEX_SONGURL = /^https:\/\/downloads\.khinsider\.com\/game-soundtracks\/album\/[^\/]+\/[^\/]+\/?$/i
const REGEX_ALBUMTITLE = /^(.*?) MP3 - Download/i
const REGEX_UNSAFEFORFILE = /[^a-z0-9\-_=+,.()\[\]{} ]/gi

async function main() {
  const playlistURL = argv[2]
  const downloadPath = argv[3] || '.'
  const runSync = argv[4]?.toLowerCase() == 'sync'

  const playlist = await fetchPlaylist(playlistURL)

  if (runSync) {
    for (const song of playlist) {
      await downloadSong(song, downloadPath || '.')
    }
  } else {
    await Promise.all(playlist.map(song => downloadSong(song, downloadPath || '.')))
  }
}

async function fetchPlaylist(url: string): Promise<PlaylistSongData[]> {
  if (!REGEX_PLAYLISTURL.test(url)) {
    throw `unaccepted url ${url}`
  }

  const req = await fetch(url)
  const text = await req.text()

  const $ = cheerio.load(text)

  const title = $.extract({
    title: 'title'
  }).title ?? ''
  const titleMatch = REGEX_ALBUMTITLE.exec(title)
  if (titleMatch === null) {
    throw `unable to grab album name from ${title}`
  }
  const albumName = titleMatch[1]

  const columns = getPlaylistTableHeaders($)
  const rows = getPlaylistRows($, columns, albumName)

  console.log(`downloading ${rows.length} songs`)

  return rows
}

interface PlaylistTableColumns {
  name: number
  track: number
  cd: number
}
interface PlaylistSongData {
  url: string
  album: string
  name: string
  track: number
  cd: number
}

function getPlaylistTableHeaders($: cheerio.CheerioAPI): PlaylistTableColumns {
  const header = $('#songlist tr#songlist_header')
  const headerCells = header.extract({
    cells: ['th']
  })

  const indexes =  headerCells.cells.reduce((p, c, i) => {
    switch (c.toLocaleLowerCase()) {
      case 'cd':
        p.cd = i
        break
      case '#':
        p.track = i
        break
      case 'song name':
        p.name = i
    }
    return p
  }, {
    name: -1,
    track: -1,
    cd: -1,
  })

  if (indexes.name == -1) {
    throw 'unable to find song title column'
  }

  return indexes
}

function getPlaylistRows($: cheerio.CheerioAPI, columns: PlaylistTableColumns, albumName: string): PlaylistSongData[] {
  const rows = $('#songlist tr:not(#songlist_header):not(#songlist_footer)')
  const rowsData: PlaylistSongData[] = []

  rows.each((_, rowEl) => {
    const row = cheerio.load(rowEl)
    const rowData: PlaylistSongData = {
      url: '',
      album: albumName,
      name: '',
      track: -1,
      cd: -1,
    }

    rowData.url = row.extract({
      url: {
        selector: 'td.playlistDownloadSong a',
        value: 'href'
      }
    }).url ?? ''

    const rowCells = row.extract({
      cells: ['td']
    }).cells

    rowData.name = rowCells[columns.name]
    if (!rowData.name) {
      throw `unable to grab song name from ${albumName} in row ${columns.name} - ${rowCells}`
    }

    if (columns.track >= 0) {
      rowData.track = parseInt(rowCells[columns.track] ?? '-1')
      if (isNaN(rowData.track)) {
        rowData.track = -1
      }
    }

    if (columns.cd >= 0) {
      rowData.cd = parseInt(rowCells[columns.cd] ?? '-1')
      if (isNaN(rowData.cd)) {
        rowData.cd = -1
      }
    }

    rowsData.push(rowData)
  })

  return rowsData
}

async function downloadSong(song: PlaylistSongData, location: string) {
  let url = song.url
  if (!/^http/i.test(url)) {
    url = 'https://downloads.khinsider.com' + url
  }
  if (!REGEX_SONGURL.test(url)) {
    throw `unaccepted url ${url}`
  }

  const resp = await fetch(url)
  const text = await resp.text()

  const $ = cheerio.load(text)
  const flacUrl = $.extract({
    url: {
      selector: '#pageContent a[href*="flac"]',
      value: 'href'
    }
  }).url

  if (!flacUrl) {
    throw `can't find download link for ${url}`
  }

  const { pathname, fullpathname } = pathFor(location, song)

  if (!existsSync(pathname)) {
    await mkdir(pathname)
  }
  if (existsSync(fullpathname)) {
    console.log(`skipping file already exists ${fullpathname}`)
  }

  console.log(`downloading ${fullpathname}`)
  console.log(`  from ${flacUrl}`)

  const songresp = await fetch(flacUrl)
  const songblob = await songresp.arrayBuffer()

  return writeFile(fullpathname, toBuffer(songblob))
}

interface SongPath {
  pathname: string
  fullpathname: string
}

function pathFor(location: string, song: PlaylistSongData): SongPath {
  const albumname = song.album.replace(REGEX_UNSAFEFORFILE, '')
  const songname = song.name.replace(REGEX_UNSAFEFORFILE, '')

  const filename = `${song.cd >= 0 ? song.cd+'.' : ''}${song.track >= 0 ? song.track+'.' : ''}${song.cd >= 0 || song.track >= 0 ? ' ' : ''}${songname}.flac`

  const pathname = path.resolve(location, albumname)
  const fullpathname = path.resolve(pathname, filename)

  return {
    pathname,
    fullpathname
  }
}

function toBuffer(arrayBuffer: ArrayBuffer) {
  const buffer = Buffer.alloc(arrayBuffer.byteLength);
  const view = new Uint8Array(arrayBuffer);
  for (let i = 0; i < buffer.length; ++i) {
    buffer[i] = view[i];
  }
  return buffer;
}

main().catch(e => console.error(e))