Como bash, o IFS não fornece uma maneira interna de tratar caracteres delimitadores consecutivos como um único delimitador (para delimitadores que não sejam espaços em branco), eu juntei uma versão bash (vs.usando uma chamada externa eg tr, awk, sed)
Ele pode manipular o IFS de vários caracteres.
Aqui estão os seus resultados de tempo de execução, juntamente com testes semelhantes para as opções tr
e awk
mostradas nesta página Q / A ... Os testes são baseados em 10000 iterações de apenas construir o arrray ( sem I / O) ...
pure bash 3.174s (28 char IFS)
call (awk) 0m32.210s (1 char IFS)
call (tr) 0m32.178s (1 char IFS)
Aqui está a saída
# dlm_str = :.~!@#$%^&()_+-='}{][ ";></,
# original = :abc:.. def:.~!@#$%^&()_+-='}{][ ";></,'single*quote?'..123:
# unified = :abc::::def::::::::::::::::::::::::::::'single*quote?'::123:
# max-w 2^ = ::::::::::::::::
# shrunk.. = :abc:def:'single*quote?':123:
# arr[0] "abc"
# arr[1] "def"
# arr[2] "'single*quote?'"
# arr[3] "123"
Aqui está o script
#!/bin/bash
# Note: This script modifies the source string.
# so work with a copy, if you need the original.
# also: Use the name varG (Global) it's required by 'shrink_repeat_chars'
#
# NOTE: * asterisk in IFS causes a regex(?) issue, but * is ok in data.
# NOTE: ? Question-mark in IFS causes a regex(?) issue, but ? is ok in data.
# NOTE: 0..9 digits in IFS causes empty/wacky elements, but they're ok in data.
# NOTE: ' single quote in IFS; don't know yet, but ' is ok in data.
#
function shrink_repeat_chars () # A 'tr -s' analog
{
# Shrink repeating occurrences of char
#
# $1: A string of delimiters which when consecutively repeated and are
# considered as a shrinkable group. A example is: " " whitespace delimiter.
#
# $varG A global var which contains the string to be "shrunk".
#
# echo "# dlm_str = $1"
# echo "# original = $varG"
dlms="$1" # arg delimiter string
dlm1=${dlms:0:1} # 1st delimiter char
dlmw=$dlm1 # work delimiter
# More than one delimiter char
# ============================
# When a delimiter contains more than one char.. ie (different byte' values),
# make all delimiter-chars in string $varG the same as the 1st delimiter char.
ix=1;xx=${#dlms};
while ((ix<xx)) ; do # Where more than one delim char, make all the same in varG
varG="${varG//${dlms:$ix:1}/$dlm1}"
ix=$((ix+1))
done
# echo "# unified = $varG"
#
# Binary shrink
# =============
# Find the longest required "power of 2' group needed for a binary shrink
while [[ "$varG" =~ .*$dlmw$dlmw.* ]] ; do dlmw=$dlmw$dlmw; done # double its length
# echo "# max-w 2^ = $dlmw"
#
# Shrik groups of delims to a single char
while [[ ! "$dlmw" == "$dlm1" ]] ; do
varG=${varG//${dlmw}$dlm1/$dlm1}
dlmw=${dlmw:$((${#dlmw}/2))}
done
varG=${varG//${dlmw}$dlm1/$dlm1}
# echo "# shrunk.. = $varG"
}
# Main
varG=':abc:.. def:.~!@#$%^&()_+-='}{][ ";></,'\''single*quote?'\''..123:'
sfi="$IFS"; IFS=':.~!@#$%^&()_+-='}{][ ";></,' # save original IFS and set new multi-char IFS
set -f # disable globbing
shrink_repeat_chars "$IFS" # The source string name must be $varG
arr=(${varG:1}) # Strip leading dlim; A single trailing dlim is ok (strangely
for ix in ${!arr[*]} ; do # Dump the array
echo "# arr[$ix] \"${arr[ix]}\""
done
set +f # re-enable globbing
IFS="$sfi" # re-instate the original IFS
#
exit