Files
khinsider-downloads/main.ts
2025-10-11 00:57:35 -05:00

239 lines
5.5 KiB
TypeScript

import { existsSync } from 'node:fs'
import { mkdir, writeFile } from 'node:fs/promises'
import path from 'node:path'
import { argv } from 'node:process'
import { Buffer } from 'node:buffer'
import * as cheerio from 'https://esm.sh/cheerio?target=esnext'
const REGEX_PLAYLISTURL =
/^https:\/\/downloads\.khinsider\.com\/game-soundtracks\/album\/[^\/]+\/?$/i
const REGEX_SONGURL =
/^https:\/\/downloads\.khinsider\.com\/game-soundtracks\/album\/[^\/]+\/[^\/]+\/?$/i
const REGEX_ALBUMTITLE = /^(.*?) MP3 - Download/i
const REGEX_UNSAFEFORFILE = /[^a-z0-9\-_=+,.()\[\]{} ]/gi
async function main() {
const playlistURL = argv[2]
const downloadPath = argv[3] || '.'
const runSync = argv[4]?.toLowerCase() == 'sync'
const playlist = await fetchPlaylist(playlistURL)
if (runSync) {
for (const song of playlist) {
await downloadSong(song, downloadPath || '.')
}
} else {
await Promise.all(
playlist.map((song) => downloadSong(song, downloadPath || '.'))
)
}
}
async function fetchPlaylist(url: string): Promise<PlaylistSongData[]> {
if (!REGEX_PLAYLISTURL.test(url)) {
throw `unaccepted url ${url}`
}
const req = await fetch(url)
const text = await req.text()
const $ = cheerio.load(text)
const title =
$.extract({
title: 'title',
}).title ?? ''
const titleMatch = REGEX_ALBUMTITLE.exec(title)
if (titleMatch === null) {
throw `unable to grab album name from ${title}`
}
const albumName = titleMatch[1]
const columns = getPlaylistTableHeaders($)
const rows = getPlaylistRows($, columns, albumName)
console.log(`downloading ${rows.length} songs`)
return rows
}
interface PlaylistTableColumns {
name: number
track: number
cd: number
}
interface PlaylistSongData {
url: string
album: string
name: string
track: number
cd: number
}
function getPlaylistTableHeaders($: cheerio.CheerioAPI): PlaylistTableColumns {
const header = $('#songlist tr#songlist_header')
const headerCells = header.extract({
cells: ['th'],
})
const indexes = headerCells.cells.reduce(
(p, c, i) => {
switch (c.toLocaleLowerCase()) {
case 'cd':
p.cd = i
break
case '#':
p.track = i
break
case 'song name':
p.name = i
}
return p
},
{
name: -1,
track: -1,
cd: -1,
}
)
if (indexes.name == -1) {
throw 'unable to find song title column'
}
return indexes
}
function getPlaylistRows(
$: cheerio.CheerioAPI,
columns: PlaylistTableColumns,
albumName: string
): PlaylistSongData[] {
const rows = $('#songlist tr:not(#songlist_header):not(#songlist_footer)')
const rowsData: PlaylistSongData[] = []
rows.each((_, rowEl) => {
const row = cheerio.load(rowEl)
const rowData: PlaylistSongData = {
url: '',
album: albumName,
name: '',
track: -1,
cd: -1,
}
rowData.url =
row.extract({
url: {
selector: 'td.playlistDownloadSong a',
value: 'href',
},
}).url ?? ''
const rowCells = row.extract({
cells: ['td'],
}).cells
rowData.name = rowCells[columns.name]
if (!rowData.name) {
throw `unable to grab song name from ${albumName} in row ${columns.name} - ${rowCells}`
}
if (columns.track >= 0) {
rowData.track = parseInt(rowCells[columns.track] ?? '-1')
if (isNaN(rowData.track)) {
rowData.track = -1
}
}
if (columns.cd >= 0) {
rowData.cd = parseInt(rowCells[columns.cd] ?? '-1')
if (isNaN(rowData.cd)) {
rowData.cd = -1
}
}
rowsData.push(rowData)
})
return rowsData
}
async function downloadSong(song: PlaylistSongData, location: string) {
let url = song.url
if (!/^http/i.test(url)) {
url = 'https://downloads.khinsider.com' + url
}
if (!REGEX_SONGURL.test(url)) {
throw `unaccepted url ${url}`
}
const resp = await fetch(url)
const text = await resp.text()
const $ = cheerio.load(text)
const flacUrl = $.extract({
url: {
selector: '#pageContent a[href*="flac"]',
value: 'href',
},
}).url
if (!flacUrl) {
throw `can't find download link for ${url}`
}
const { pathname, fullpathname } = pathFor(location, song)
if (!existsSync(pathname)) {
await mkdir(pathname)
}
if (existsSync(fullpathname)) {
console.log(`skipping file already exists ${fullpathname}`)
}
console.log(`downloading ${fullpathname}`)
console.log(` from ${flacUrl}`)
const songresp = await fetch(flacUrl)
const songblob = await songresp.arrayBuffer()
return writeFile(fullpathname, toBuffer(songblob))
}
interface SongPath {
pathname: string
fullpathname: string
}
function pathFor(location: string, song: PlaylistSongData): SongPath {
const albumname = song.album.replace(REGEX_UNSAFEFORFILE, '')
const songname = song.name.replace(REGEX_UNSAFEFORFILE, '')
const cd = song.cd >= 0 ? song.cd + '.' : ''
const track = song.track >= 0 ? song.track + '.' : ''
const separator = song.cd >= 0 || song.track >= 0 ? ' ' : ''
const filename = `${cd}${track}${separator}${songname}.flac`
const pathname = path.resolve(location, albumname)
const fullpathname = path.resolve(pathname, filename)
return {
pathname,
fullpathname,
}
}
function toBuffer(arrayBuffer: ArrayBuffer) {
const buffer = Buffer.alloc(arrayBuffer.byteLength)
const view = new Uint8Array(arrayBuffer)
for (let i = 0; i < buffer.length; ++i) {
buffer[i] = view[i]
}
return buffer
}
main().catch((e) => console.error(e))