334 lines
7.3 KiB
TypeScript
334 lines
7.3 KiB
TypeScript
import path from 'node:path'
|
|
import { Buffer } from 'node:buffer'
|
|
import { parseArgs } from "jsr:@std/cli/parse-args"
|
|
import { exists } from "jsr:@std/fs/exists";
|
|
import * as cheerio from 'https://esm.sh/cheerio?target=esnext'
|
|
|
|
const REGEX_PLAYLISTURL =
|
|
/^https:\/\/downloads\.khinsider\.com\/game-soundtracks\/album\/[^\/]+\/?$/i
|
|
const REGEX_SONGURL =
|
|
/^https:\/\/downloads\.khinsider\.com\/game-soundtracks\/album\/[^\/]+\/[^\/]+\/?$/i
|
|
const REGEX_ALBUMTITLE = /^(.*?) MP3 - Download/i
|
|
const REGEX_UNSAFEFORFILE = /[^a-z0-9\-_=+,.()\[\]{} ]/gi
|
|
|
|
// parse args
|
|
const flags = parseArgs(Deno.args, {
|
|
alias: {
|
|
"url": ["u"],
|
|
"output": ["o"],
|
|
"sync": ["s"],
|
|
"help": ["h"],
|
|
},
|
|
string: ["url", "output"],
|
|
boolean: ["sync", "help"],
|
|
default: {
|
|
"url": "",
|
|
"output": ".",
|
|
"sync": false
|
|
}
|
|
})
|
|
|
|
if (!flags.url && flags._.length > 0) {
|
|
flags.url = flags._[0].toString()
|
|
}
|
|
|
|
function printHelp() {
|
|
console.log(`deno
|
|
--allow-net --allow-write --allow-read main.ts
|
|
[--url] <url>
|
|
[--output <downloadPath>]
|
|
[--sync]
|
|
[--help]
|
|
|
|
parameters:
|
|
--url -u (default)
|
|
url to download
|
|
|
|
--output -o
|
|
default: "."
|
|
output path
|
|
|
|
--sync -s
|
|
download files one at a time
|
|
|
|
--help -h
|
|
print help message
|
|
`)
|
|
}
|
|
|
|
async function main() {
|
|
// load all song details
|
|
const playlist = await fetchPlaylist(flags.url)
|
|
|
|
// if sync, download one at a time
|
|
if (flags.sync) {
|
|
console.log('downloading one at a time\n')
|
|
for (const song of playlist) {
|
|
await downloadSong(song, flags.output || '.')
|
|
}
|
|
} else {
|
|
console.log('downloading all at once\n')
|
|
await Promise.all(
|
|
playlist.map((song) => downloadSong(song, flags.output || '.'))
|
|
)
|
|
}
|
|
}
|
|
|
|
async function fetchPlaylist(url: string): Promise<PlaylistSongData[]> {
|
|
if (!REGEX_PLAYLISTURL.test(url)) {
|
|
throw `unaccepted url ${url}`
|
|
}
|
|
|
|
console.log(`downloading: ${url}`)
|
|
|
|
// load the playlist page's dom
|
|
const req = await fetch(url)
|
|
const text = await req.text()
|
|
const $ = cheerio.load(text)
|
|
|
|
// get the album name from the page title
|
|
const title =
|
|
$.extract({
|
|
title: 'title',
|
|
}).title ?? ''
|
|
const titleMatch = REGEX_ALBUMTITLE.exec(title)
|
|
if (titleMatch === null) {
|
|
throw `unable to grab album name from ${title}`
|
|
}
|
|
const albumName = titleMatch[1]
|
|
console.log(` title: ${albumName}`)
|
|
|
|
// parse all rows in playlist
|
|
const columns = getPlaylistTableHeaders($)
|
|
const rows = getPlaylistRows($, columns, albumName)
|
|
console.log(` songs: ${rows.length}\n`)
|
|
|
|
return rows
|
|
}
|
|
|
|
interface PlaylistTableColumns {
|
|
name: number
|
|
track: number
|
|
cd: number
|
|
}
|
|
|
|
interface PlaylistSongData {
|
|
url: string
|
|
album: string
|
|
name: string
|
|
track: number
|
|
cd: number
|
|
}
|
|
|
|
function getPlaylistTableHeaders($: cheerio.CheerioAPI): PlaylistTableColumns {
|
|
// get table header row
|
|
const header = $('#songlist tr#songlist_header')
|
|
const headerCells = header.extract({
|
|
cells: ['th'],
|
|
})
|
|
|
|
// get the index for specific columns
|
|
const indexes = headerCells.cells.reduce(
|
|
(p, c, i) => {
|
|
// check the string content of the current cell
|
|
switch (c.toLocaleLowerCase()) {
|
|
case 'cd':
|
|
p.cd = i
|
|
break
|
|
case '#':
|
|
p.track = i
|
|
break
|
|
case 'song name':
|
|
p.name = i
|
|
}
|
|
return p
|
|
},
|
|
// default values
|
|
{
|
|
name: -1,
|
|
track: -1,
|
|
cd: -1,
|
|
}
|
|
)
|
|
|
|
if (indexes.name == -1) {
|
|
throw 'unable to find song title column'
|
|
}
|
|
|
|
return indexes
|
|
}
|
|
|
|
function getPlaylistRows(
|
|
$: cheerio.CheerioAPI,
|
|
columns: PlaylistTableColumns,
|
|
albumName: string
|
|
): PlaylistSongData[] {
|
|
const rows = $('#songlist tr:not(#songlist_header):not(#songlist_footer)')
|
|
const rowsData: PlaylistSongData[] = []
|
|
|
|
// loop through each song in table
|
|
rows.each((_, rowEl) => {
|
|
const row = cheerio.load(rowEl)
|
|
const rowData: PlaylistSongData = {
|
|
url: '',
|
|
album: albumName,
|
|
name: '',
|
|
track: -1,
|
|
cd: -1,
|
|
}
|
|
|
|
// prase values from row
|
|
rowData.url =
|
|
row.extract({
|
|
url: {
|
|
selector: 'td.playlistDownloadSong a',
|
|
value: 'href',
|
|
},
|
|
}).url ?? ''
|
|
|
|
const rowCells = row.extract({
|
|
cells: ['td'],
|
|
}).cells
|
|
|
|
rowData.name = rowCells[columns.name]
|
|
if (!rowData.name) {
|
|
throw `unable to grab song name from ${albumName} in row ${columns.name} - ${rowCells}`
|
|
}
|
|
|
|
if (columns.track >= 0) {
|
|
rowData.track = parseInt(rowCells[columns.track] ?? '-1')
|
|
if (isNaN(rowData.track)) {
|
|
rowData.track = -1
|
|
}
|
|
}
|
|
|
|
if (columns.cd >= 0) {
|
|
rowData.cd = parseInt(rowCells[columns.cd] ?? '-1')
|
|
if (isNaN(rowData.cd)) {
|
|
rowData.cd = -1
|
|
}
|
|
}
|
|
|
|
rowsData.push(rowData)
|
|
})
|
|
|
|
return rowsData
|
|
}
|
|
|
|
async function downloadSong(song: PlaylistSongData, location: string) {
|
|
// get full url
|
|
let url = song.url
|
|
if (!/^http/i.test(url)) {
|
|
url = 'https://downloads.khinsider.com' + url
|
|
}
|
|
if (!REGEX_SONGURL.test(url)) {
|
|
throw `unaccepted url ${url}`
|
|
}
|
|
|
|
// load download page
|
|
const resp = await fetch(url)
|
|
const text = await resp.text()
|
|
const $ = cheerio.load(text)
|
|
|
|
// extract the flac download link
|
|
const flacUrl = $.extract({
|
|
url: {
|
|
selector: '#pageContent a[href*="flac"]',
|
|
value: 'href',
|
|
},
|
|
}).url
|
|
|
|
if (!flacUrl) {
|
|
throw `can't find download link for ${url}`
|
|
}
|
|
|
|
// get the file and path to save the files
|
|
const { pathname, fullpathname } = pathFor(location, song)
|
|
|
|
// ensure folder exists
|
|
if (!await exists(pathname)) {
|
|
await Deno.mkdir(pathname)
|
|
}
|
|
|
|
// skip file if it exists
|
|
if (await exists(fullpathname)) {
|
|
console.log(`skipping file already exists ${fullpathname}`)
|
|
}
|
|
|
|
console.log(`downloading ${fullpathname}`)
|
|
console.log(` from ${flacUrl}`)
|
|
|
|
// download the file
|
|
const songresp = await fetch(flacUrl)
|
|
const songblob = await songresp.arrayBuffer()
|
|
return Deno.writeFile(fullpathname, toBuffer(songblob))
|
|
}
|
|
|
|
interface SongPath {
|
|
pathname: string
|
|
fullpathname: string
|
|
}
|
|
|
|
function pathFor(location: string, song: PlaylistSongData): SongPath {
|
|
// clean strings for file paths
|
|
const albumname = song.album.replace(REGEX_UNSAFEFORFILE, '')
|
|
const songname = song.name.replace(REGEX_UNSAFEFORFILE, '')
|
|
|
|
const cd = song.cd >= 0 ? song.cd + '.' : ''
|
|
const track = song.track >= 0 ? song.track + '.' : ''
|
|
const separator = song.cd >= 0 || song.track >= 0 ? ' ' : ''
|
|
const filename = `${cd}${track}${separator}${songname}.flac`
|
|
/*
|
|
for example
|
|
|
|
song = {
|
|
songname: 'song',
|
|
track: 1,
|
|
cd: 1,
|
|
}
|
|
then filename = '1.1. song'
|
|
|
|
song = {
|
|
songname: 'song',
|
|
track: 1,
|
|
cd: -1,
|
|
}
|
|
then filename = '1. song'
|
|
|
|
song = {
|
|
songname: 'song',
|
|
track: -1,
|
|
cd: -1,
|
|
}
|
|
then filename = 'song'
|
|
*/
|
|
|
|
const pathname = path.resolve(location, albumname)
|
|
const fullpathname = path.resolve(pathname, filename)
|
|
|
|
return {
|
|
pathname,
|
|
fullpathname,
|
|
}
|
|
}
|
|
|
|
// convert ArrayBuffer to Buffer<ArrayBuffer>
|
|
function toBuffer(arrayBuffer: ArrayBuffer): Buffer<ArrayBuffer> {
|
|
const buffer = Buffer.alloc(arrayBuffer.byteLength)
|
|
const view = new Uint8Array(arrayBuffer)
|
|
for (let i = 0; i < buffer.length; ++i) {
|
|
buffer[i] = view[i]
|
|
}
|
|
return buffer
|
|
}
|
|
|
|
if (!flags.url) {
|
|
console.log('Missing URL\n\n')
|
|
printHelp()
|
|
} else if (flags.help) {
|
|
printHelp()
|
|
} else {
|
|
main().catch((e) => console.error(e))
|
|
}
|
|
|