#!/usr/bin/sh # wcurl - a simple wrapper around curl to easily download files. # # Requires curl >= 7.46.0 (2015) # # Copyright (C) Samuel Henrique , Sergio Durigan # Junior and many contributors, see the AUTHORS # file. # # Permission to use, copy, modify, and distribute this software for any purpose # with or without fee is hereby granted, provided that the above copyright # notice and this permission notice appear in all copies. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. IN # NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, # DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR # OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE # OR OTHER DEALINGS IN THE SOFTWARE. # # Except as contained in this notice, the name of a copyright holder shall not be # used in advertising or otherwise to promote the sale, use or other dealings in # this Software without prior written authorization of the copyright holder. # # SPDX-License-Identifier: curl # Stop on errors and on usage of unset variables. set -eu VERSION="2025.05.26" PROGRAM_NAME="$(basename "$0")" readonly PROGRAM_NAME # Display the version. print_version() { cat << _EOF_ ${VERSION} _EOF_ } # Display the program usage. usage() { cat << _EOF_ ${PROGRAM_NAME} -- a simple wrapper around curl to easily download files. Usage: ${PROGRAM_NAME} ... ${PROGRAM_NAME} [--curl-options ]... [--no-decode-filename] [-o|-O|--output ] [--dry-run] [--] ... ${PROGRAM_NAME} [--curl-options=]... [--no-decode-filename] [--output=] [--dry-run] [--] ... ${PROGRAM_NAME} -h|--help ${PROGRAM_NAME} -V|--version Options: --curl-options : Specify extra options to be passed when invoking curl. May be specified more than once. -o, -O, --output : Use the provided output path instead of getting it from the URL. If multiple URLs are provided, resulting files share the same name with a number appended to the end (curl >= 7.83.0). If this option is provided multiple times, only the last value is considered. --no-decode-filename: Don't percent-decode the output filename, even if the percent-encoding in the URL was done by wcurl, e.g.: The URL contained whitespaces. --dry-run: Don't actually execute curl, just print what would be invoked. -V, --version: Print version information. -h, --help: Print this usage message. : Any option supported by curl can be set here. This is not used by wcurl; it is instead forwarded to the curl invocation. : URL to be downloaded. Anything that is not a parameter is considered an URL. Whitespaces are percent-encoded and the URL is passed to curl, which then performs the parsing. May be specified more than once. _EOF_ } # Display an error message and bail out. error() { printf "%s\n" "$*" > /dev/stderr exit 1 } # Extra curl options provided by the user. # This is set per-URL for every URL provided. # Some options are global, but we are erroring on the side of needlesly setting # them multiple times instead of causing issues with parameters that needs to # be set per-URL. CURL_OPTIONS="" # The URLs to be downloaded. URLS="" # Variable used to be set to the percent-decoded filename parsed from the URL, unless # --output or --no-decode-filename are used. OUTPUT_PATH="" HAS_USER_SET_OUTPUT="false" # The parameters that are passed per-URL to curl. readonly PER_URL_PARAMETERS="\ --fail \ --globoff \ --location \ --proto-default https \ --remote-time \ --retry 5 " # Whether to invoke curl or not. DRY_RUN="false" # Sanitize parameters. sanitize() { if [ -z "${URLS}" ]; then error "You must provide at least one URL to download." fi readonly CURL_OPTIONS URLS DRY_RUN HAS_USER_SET_OUTPUT } # Indicate via exit code whether the string given in the first parameter # consists solely of characters from the string given in the second parameter. # In other words, it returns 0 if the first parameter only contains characters # from the second parameter, e.g.: Are $1 characters a subset of $2 characters? is_subset_of() { case "${1}" in *[!${2}]*|'') return 1;; esac } # Print the given string percent-decoded. percent_decode() { # Encodings of control characters (00-1F) are passed through without decoding. # Iterate on the input character-by-character, decoding it. printf "%s\n" "${1}" | fold -w1 | while IFS= read -r decode_out; do # If character is a "%", read the next character as decode_hex1. if [ "${decode_out}" = % ] && IFS= read -r decode_hex1; then decode_out="${decode_out}${decode_hex1}" # If there's one more character, read it as decode_hex2. if IFS= read -r decode_hex2; then decode_out="${decode_out}${decode_hex2}" # Skip decoding if this is a control character (00-1F). # Skip decoding if DECODE_FILENAME is not "true". if is_subset_of "${decode_hex1}" "23456789abcdefABCDEF" && \ is_subset_of "${decode_hex2}" "0123456789abcdefABCDEF" && \ [ "${DECODE_FILENAME}" = "true" ]; then # Use printf to decode it into octal and then decode it to the final format. decode_out="$(printf "%b" "\\$(printf %o "0x${decode_hex1}${decode_hex2}")")" fi fi fi printf %s "${decode_out}" done } # Print the percent-decoded filename portion of the given URL. get_url_filename() { # Remove protocol and query string if present. hostname_and_path="$(printf %s "${1}" | sed -e 's,^[^/]*//,,' -e 's,?.*$,,')" # If what remains contains a slash, there's a path; return it percent-decoded. case "${hostname_and_path}" in # sed to remove everything preceding the last '/', e.g.: "example/something" becomes "something" */*) percent_decode "$(printf %s "${hostname_and_path}" | sed -e 's,^.*/,,')";; esac # No slash means there was just a hostname and no path; return empty string. } # Execute curl with the list of URLs provided by the user. exec_curl() { CMD="curl " # Store version to check if it supports --no-clobber and --parallel. curl_version=$($CMD --version | cut -f2 -d' ' | head -n1) curl_version_major=$(echo "$curl_version" | cut -f1 -d.) curl_version_minor=$(echo "$curl_version" | cut -f2 -d.) CURL_HAS_NO_CLOBBER="" CURL_HAS_PARALLEL="" # --no-clobber is only supported since 7.83.0. # --parallel is only supported since 7.66.0. if [ "${curl_version_major}" -ge 8 ]; then CURL_HAS_NO_CLOBBER="--no-clobber" CURL_HAS_PARALLEL="--parallel" elif [ "${curl_version_major}" -eq 7 ];then if [ "${curl_version_minor}" -ge 83 ]; then CURL_HAS_NO_CLOBBER="--no-clobber" fi if [ "${curl_version_minor}" -ge 66 ]; then CURL_HAS_PARALLEL="--parallel" fi fi # Detecting whether we need --parallel. It's easier to rely on # the shell's argument parsing. # shellcheck disable=SC2086 set -- $URLS if [ "$#" -gt 1 ]; then CURL_PARALLEL="$CURL_HAS_PARALLEL" else CURL_PARALLEL="" fi # Start assembling the command. # # We use 'set --' here (again) because (a) we don't have arrays on # POSIX shell, and (b) we need better control over the way we # split arguments. # # shellcheck disable=SC2086 set -- ${CMD} ${CURL_PARALLEL} NEXT_PARAMETER="" for url in ${URLS}; do # If the user did not provide an output path, define one. if [ "${HAS_USER_SET_OUTPUT}" = "false" ]; then OUTPUT_PATH="$(get_url_filename "${url}")" # If we could not get a path from the URL, use the default: index.html. [ -z "${OUTPUT_PATH}" ] && OUTPUT_PATH=index.html fi # shellcheck disable=SC2086 set -- "$@" ${NEXT_PARAMETER} ${PER_URL_PARAMETERS} ${CURL_HAS_NO_CLOBBER} ${CURL_OPTIONS} --output "${OUTPUT_PATH}" "${url}" NEXT_PARAMETER="--next" done if [ "${DRY_RUN}" = "false" ]; then exec "$@" else printf "%s\n" "$@" fi } # Default to decoding the output filename DECODE_FILENAME="true" # Use "${1-}" in order to avoid errors because of 'set -u'. while [ -n "${1-}" ]; do case "${1}" in --curl-options=*) opt=$(printf "%s\n" "${1}" | sed 's/^--curl-options=//') CURL_OPTIONS="${CURL_OPTIONS} ${opt}" ;; --curl-options) shift CURL_OPTIONS="${CURL_OPTIONS} ${1}" ;; --dry-run) DRY_RUN="true" ;; --output=*) opt=$(printf "%s\n" "${1}" | sed 's/^--output=//') HAS_USER_SET_OUTPUT="true" OUTPUT_PATH="${opt}" ;; -o|-O|--output) shift HAS_USER_SET_OUTPUT="true" OUTPUT_PATH="${1}" ;; -o*|-O*) opt=$(printf "%s\n" "${1}" | sed 's/^-[oO]//') HAS_USER_SET_OUTPUT="true" OUTPUT_PATH="${opt}" ;; --no-decode-filename) DECODE_FILENAME="false" ;; -h|--help) usage exit 0 ;; -V|--version) print_version exit 0 ;; --) # This is the start of the list of URLs. shift for url in "$@"; do # Encode whitespaces into %20, since wget supports those URLs. newurl=$(printf "%s\n" "${url}" | sed 's/ /%20/g') URLS="${URLS} ${newurl}" done break ;; -*) error "Unknown option: '$1'." ;; *) # This must be a URL. # Encode whitespaces into %20, since wget supports those URLs. newurl=$(printf "%s\n" "${1}" | sed 's/ /%20/g') URLS="${URLS} ${newurl}" ;; esac shift done sanitize exec_curl