Files
khinsider-downloads/main.ts
2025-10-23 02:17:40 -05:00

334 lines
7.3 KiB
TypeScript

import path from 'node:path'
import { Buffer } from 'node:buffer'
import { parseArgs } from "jsr:@std/cli/parse-args"
import { exists } from "jsr:@std/fs/exists";
import * as cheerio from 'https://esm.sh/cheerio?target=esnext'
const REGEX_PLAYLISTURL =
/^https:\/\/downloads\.khinsider\.com\/game-soundtracks\/album\/[^\/]+\/?$/i
const REGEX_SONGURL =
/^https:\/\/downloads\.khinsider\.com\/game-soundtracks\/album\/[^\/]+\/[^\/]+\/?$/i
const REGEX_ALBUMTITLE = /^(.*?) MP3 - Download/i
const REGEX_UNSAFEFORFILE = /[^a-z0-9\-_=+,.()\[\]{} ]/gi
// parse args
const flags = parseArgs(Deno.args, {
alias: {
"url": ["u"],
"output": ["o"],
"sync": ["s"],
"help": ["h"],
},
string: ["url", "output"],
boolean: ["sync", "help"],
default: {
"url": "",
"output": ".",
"sync": false
}
})
if (!flags.url && flags._.length > 0) {
flags.url = flags._[0].toString()
}
function printHelp() {
console.log(`deno
--allow-net --allow-write --allow-read main.ts
[--url] <url>
[--output <downloadPath>]
[--sync]
[--help]
parameters:
--url -u (default)
url to download
--output -o
default: "."
output path
--sync -s
download files one at a time
--help -h
print help message
`)
}
async function main() {
// load all song details
const playlist = await fetchPlaylist(flags.url)
// if sync, download one at a time
if (flags.sync) {
console.log('downloading one at a time\n')
for (const song of playlist) {
await downloadSong(song, flags.output || '.')
}
} else {
console.log('downloading all at once\n')
await Promise.all(
playlist.map((song) => downloadSong(song, flags.output || '.'))
)
}
}
async function fetchPlaylist(url: string): Promise<PlaylistSongData[]> {
if (!REGEX_PLAYLISTURL.test(url)) {
throw `unaccepted url ${url}`
}
console.log(`downloading: ${url}`)
// load the playlist page's dom
const req = await fetch(url)
const text = await req.text()
const $ = cheerio.load(text)
// get the album name from the page title
const title =
$.extract({
title: 'title',
}).title ?? ''
const titleMatch = REGEX_ALBUMTITLE.exec(title)
if (titleMatch === null) {
throw `unable to grab album name from ${title}`
}
const albumName = titleMatch[1]
console.log(` title: ${albumName}`)
// parse all rows in playlist
const columns = getPlaylistTableHeaders($)
const rows = getPlaylistRows($, columns, albumName)
console.log(` songs: ${rows.length}\n`)
return rows
}
interface PlaylistTableColumns {
name: number
track: number
cd: number
}
interface PlaylistSongData {
url: string
album: string
name: string
track: number
cd: number
}
function getPlaylistTableHeaders($: cheerio.CheerioAPI): PlaylistTableColumns {
// get table header row
const header = $('#songlist tr#songlist_header')
const headerCells = header.extract({
cells: ['th'],
})
// get the index for specific columns
const indexes = headerCells.cells.reduce(
(p, c, i) => {
// check the string content of the current cell
switch (c.toLocaleLowerCase()) {
case 'cd':
p.cd = i
break
case '#':
p.track = i
break
case 'song name':
p.name = i
}
return p
},
// default values
{
name: -1,
track: -1,
cd: -1,
}
)
if (indexes.name == -1) {
throw 'unable to find song title column'
}
return indexes
}
function getPlaylistRows(
$: cheerio.CheerioAPI,
columns: PlaylistTableColumns,
albumName: string
): PlaylistSongData[] {
const rows = $('#songlist tr:not(#songlist_header):not(#songlist_footer)')
const rowsData: PlaylistSongData[] = []
// loop through each song in table
rows.each((_, rowEl) => {
const row = cheerio.load(rowEl)
const rowData: PlaylistSongData = {
url: '',
album: albumName,
name: '',
track: -1,
cd: -1,
}
// prase values from row
rowData.url =
row.extract({
url: {
selector: 'td.playlistDownloadSong a',
value: 'href',
},
}).url ?? ''
const rowCells = row.extract({
cells: ['td'],
}).cells
rowData.name = rowCells[columns.name]
if (!rowData.name) {
throw `unable to grab song name from ${albumName} in row ${columns.name} - ${rowCells}`
}
if (columns.track >= 0) {
rowData.track = parseInt(rowCells[columns.track] ?? '-1')
if (isNaN(rowData.track)) {
rowData.track = -1
}
}
if (columns.cd >= 0) {
rowData.cd = parseInt(rowCells[columns.cd] ?? '-1')
if (isNaN(rowData.cd)) {
rowData.cd = -1
}
}
rowsData.push(rowData)
})
return rowsData
}
async function downloadSong(song: PlaylistSongData, location: string) {
// get full url
let url = song.url
if (!/^http/i.test(url)) {
url = 'https://downloads.khinsider.com' + url
}
if (!REGEX_SONGURL.test(url)) {
throw `unaccepted url ${url}`
}
// load download page
const resp = await fetch(url)
const text = await resp.text()
const $ = cheerio.load(text)
// extract the flac download link
const flacUrl = $.extract({
url: {
selector: '#pageContent a[href*="flac"]',
value: 'href',
},
}).url
if (!flacUrl) {
throw `can't find download link for ${url}`
}
// get the file and path to save the files
const { pathname, fullpathname } = pathFor(location, song)
// ensure folder exists
if (!await exists(pathname)) {
await Deno.mkdir(pathname)
}
// skip file if it exists
if (await exists(fullpathname)) {
console.log(`skipping file already exists ${fullpathname}`)
}
console.log(`downloading ${fullpathname}`)
console.log(` from ${flacUrl}`)
// download the file
const songresp = await fetch(flacUrl)
const songblob = await songresp.arrayBuffer()
return Deno.writeFile(fullpathname, toBuffer(songblob))
}
interface SongPath {
pathname: string
fullpathname: string
}
function pathFor(location: string, song: PlaylistSongData): SongPath {
// clean strings for file paths
const albumname = song.album.replace(REGEX_UNSAFEFORFILE, '')
const songname = song.name.replace(REGEX_UNSAFEFORFILE, '')
const cd = song.cd >= 0 ? song.cd + '.' : ''
const track = song.track >= 0 ? song.track + '.' : ''
const separator = song.cd >= 0 || song.track >= 0 ? ' ' : ''
const filename = `${cd}${track}${separator}${songname}.flac`
/*
for example
song = {
songname: 'song',
track: 1,
cd: 1,
}
then filename = '1.1. song'
song = {
songname: 'song',
track: 1,
cd: -1,
}
then filename = '1. song'
song = {
songname: 'song',
track: -1,
cd: -1,
}
then filename = 'song'
*/
const pathname = path.resolve(location, albumname)
const fullpathname = path.resolve(pathname, filename)
return {
pathname,
fullpathname,
}
}
// convert ArrayBuffer to Buffer<ArrayBuffer>
function toBuffer(arrayBuffer: ArrayBuffer): Buffer<ArrayBuffer> {
const buffer = Buffer.alloc(arrayBuffer.byteLength)
const view = new Uint8Array(arrayBuffer)
for (let i = 0; i < buffer.length; ++i) {
buffer[i] = view[i]
}
return buffer
}
if (!flags.url) {
console.log('Missing URL\n\n')
printHelp()
} else if (flags.help) {
printHelp()
} else {
main().catch((e) => console.error(e))
}