khinsider-downloads/main.ts

import path from 'node:path'
import { Buffer } from 'node:buffer'
import { parseArgs } from "jsr:@std/cli/parse-args"
import { exists } from "jsr:@std/fs/exists";
import * as cheerio from 'https://esm.sh/cheerio?target=esnext'

const REGEX_PLAYLISTURL =
  /^https:\/\/downloads\.khinsider\.com\/game-soundtracks\/album\/[^\/]+\/?$/i
const REGEX_SONGURL =
  /^https:\/\/downloads\.khinsider\.com\/game-soundtracks\/album\/[^\/]+\/[^\/]+\/?$/i
const REGEX_ALBUMTITLE = /^(.*?) MP3 - Download/i
const REGEX_UNSAFEFORFILE = /[^a-z0-9\-_=+,.()\[\]{} ]/gi

// parse args
const flags = parseArgs(Deno.args, {
  alias: {
    "url": ["u"],
    "output": ["o"],
    "sync": ["s"],
    "help": ["h"],
  },
  string: ["url", "output"],
  boolean: ["sync", "help"],
  default: {
    "url": "",
    "output": ".",
    "sync": false
  }
})

if (!flags.url && flags._.length > 0) {
  flags.url = flags._[0].toString()
}

function printHelp() {
  console.log(`deno
    --allow-net --allow-write --allow-read main.ts
    [--url] <url>
    [--output <downloadPath>]
    [--sync]
    [--help]

parameters:
    --url -u (default)
        url to download

    --output -o
        default: "."
        output path

    --sync -s
        download files one at a time

    --help -h
        print help message
`)
}

async function main() {
  // load all song details
  const playlist = await fetchPlaylist(flags.url)

  // if sync, download one at a time
  if (flags.sync) {
    console.log('downloading one at a time\n')
    for (const song of playlist) {
      await downloadSong(song, flags.output || '.')
    }
  } else {
    console.log('downloading all at once\n')
    await Promise.all(
      playlist.map((song) => downloadSong(song, flags.output || '.'))
    )
  }
}

async function fetchPlaylist(url: string): Promise<PlaylistSongData[]> {
  if (!REGEX_PLAYLISTURL.test(url)) {
    throw `unaccepted url ${url}`
  }

  console.log(`downloading: ${url}`)

  // load the playlist page's dom
  const req = await fetch(url)
  const text = await req.text()
  const $ = cheerio.load(text)

  // get the album name from the page title
  const title =
    $.extract({
      title: 'title',
    }).title ?? ''
  const titleMatch = REGEX_ALBUMTITLE.exec(title)
  if (titleMatch === null) {
    throw `unable to grab album name from ${title}`
  }
  const albumName = titleMatch[1]
  console.log(`  title: ${albumName}`)

  // parse all rows in playlist
  const columns = getPlaylistTableHeaders($)
  const rows = getPlaylistRows($, columns, albumName)
  console.log(`  songs: ${rows.length}\n`)

  return rows
}

interface PlaylistTableColumns {
  name: number
  track: number
  cd: number
}

interface PlaylistSongData {
  url: string
  album: string
  name: string
  track: number
  cd: number
}

function getPlaylistTableHeaders($: cheerio.CheerioAPI): PlaylistTableColumns {
  // get table header row
  const header = $('#songlist tr#songlist_header')
  const headerCells = header.extract({
    cells: ['th'],
  })

  // get the index for specific columns
  const indexes = headerCells.cells.reduce(
    (p, c, i) => {
      // check the string content of the current cell
      switch (c.toLocaleLowerCase()) {
        case 'cd':
          p.cd = i
          break
        case '#':
          p.track = i
          break
        case 'song name':
          p.name = i
      }
      return p
    },
    // default values
    {
      name: -1,
      track: -1,
      cd: -1,
    }
  )

  if (indexes.name == -1) {
    throw 'unable to find song title column'
  }

  return indexes
}

function getPlaylistRows(
  $: cheerio.CheerioAPI,
  columns: PlaylistTableColumns,
  albumName: string
): PlaylistSongData[] {
  const rows = $('#songlist tr:not(#songlist_header):not(#songlist_footer)')
  const rowsData: PlaylistSongData[] = []

  // loop through each song in table
  rows.each((_, rowEl) => {
    const row = cheerio.load(rowEl)
    const rowData: PlaylistSongData = {
      url: '',
      album: albumName,
      name: '',
      track: -1,
      cd: -1,
    }

    // prase values from row
    rowData.url =
      row.extract({
        url: {
          selector: 'td.playlistDownloadSong a',
          value: 'href',
        },
      }).url ?? ''

    const rowCells = row.extract({
      cells: ['td'],
    }).cells

    rowData.name = rowCells[columns.name]
    if (!rowData.name) {
      throw `unable to grab song name from ${albumName} in row ${columns.name} - ${rowCells}`
    }

    if (columns.track >= 0) {
      rowData.track = parseInt(rowCells[columns.track] ?? '-1')
      if (isNaN(rowData.track)) {
        rowData.track = -1
      }
    }

    if (columns.cd >= 0) {
      rowData.cd = parseInt(rowCells[columns.cd] ?? '-1')
      if (isNaN(rowData.cd)) {
        rowData.cd = -1
      }
    }

    rowsData.push(rowData)
  })

  return rowsData
}

async function downloadSong(song: PlaylistSongData, location: string) {
  // get full url
  let url = song.url
  if (!/^http/i.test(url)) {
    url = 'https://downloads.khinsider.com' + url
  }
  if (!REGEX_SONGURL.test(url)) {
    throw `unaccepted url ${url}`
  }

  // load download page
  const resp = await fetch(url)
  const text = await resp.text()
  const $ = cheerio.load(text)

  // extract the flac download link
  const flacUrl = $.extract({
    url: {
      selector: '#pageContent a[href*="flac"]',
      value: 'href',
    },
  }).url

  if (!flacUrl) {
    throw `can't find download link for ${url}`
  }

  // get the file and path to save the files
  const { pathname, fullpathname } = pathFor(location, song)

  // ensure folder exists
  if (!await exists(pathname)) {
    await Deno.mkdir(pathname)
  }

  // skip file if it exists
  if (await exists(fullpathname)) {
    console.log(`skipping file already exists ${fullpathname}`)
  }

  console.log(`downloading ${fullpathname}`)
  console.log(`  from ${flacUrl}`)

  // download the file
  const songresp = await fetch(flacUrl)
  const songblob = await songresp.arrayBuffer()
  return Deno.writeFile(fullpathname, toBuffer(songblob))
}

interface SongPath {
  pathname: string
  fullpathname: string
}

function pathFor(location: string, song: PlaylistSongData): SongPath {
  // clean strings for file paths
  const albumname = song.album.replace(REGEX_UNSAFEFORFILE, '')
  const songname = song.name.replace(REGEX_UNSAFEFORFILE, '')

  const cd = song.cd >= 0 ? song.cd + '.' : ''
  const track = song.track >= 0 ? song.track + '.' : ''
  const separator = song.cd >= 0 || song.track >= 0 ? ' ' : ''
  const filename = `${cd}${track}${separator}${songname}.flac`
  /*
  for example

  song = {
    songname: 'song',
    track: 1,
    cd: 1,
  }
  then filename = '1.1. song'

  song = {
    songname: 'song',
    track: 1,
    cd: -1,
  }
  then filename = '1. song'

  song = {
    songname: 'song',
    track: -1,
    cd: -1,
  }
  then filename = 'song'
  */

  const pathname = path.resolve(location, albumname)
  const fullpathname = path.resolve(pathname, filename)

  return {
    pathname,
    fullpathname,
  }
}

// convert ArrayBuffer to Buffer<ArrayBuffer>
function toBuffer(arrayBuffer: ArrayBuffer): Buffer<ArrayBuffer> {
  const buffer = Buffer.alloc(arrayBuffer.byteLength)
  const view = new Uint8Array(arrayBuffer)
  for (let i = 0; i < buffer.length; ++i) {
    buffer[i] = view[i]
  }
  return buffer
}

if (!flags.url) {
  console.log('Missing URL\n\n')
  printHelp()
} else if (flags.help) {
  printHelp()
} else {
  main().catch((e) => console.error(e))
}