您的位置:首页 > 运维架构 > Shell

UNIX_shell_programming(几道练习题目)

2013-10-10 09:22 344 查看

Word Frequency

Given n files as command line arguments, calculate the frequency of words for each file, and display the results in a table. Words are defined to be one or more continuous list of letters (see the definition of letters above). Every non-letter character
is to be considered whitespace.

The output should be a multi-column list, the first column being the list of words encountered (in lowercase) in any of the input files (sorted according to the
C locale), and subsequent columns containing the number of occurrences of that word in
file1 ... filen, separated by spaces. Example output for 2 files:

a 5 8
the 6 3
word 2 0
she 3 5

#!/bin/bash

# Word frequencies
# $Id: unix9-wordfreq.sh 191 2006-03-29 11:07:00Z cactus $
# See http://cactus.rulez.org/elte/2005-1-unix/#9 for a description of what it does
# Licensed under the GNU General Public License, version 2

function help () {
self=`basename $0`

cat << EOF
Usage: $self FILE1 [FILE2...]
Creates statistics about the words occuring in the files.

Options:
-help   Display this help message

(C) 2005 Dr. ERDI Gergo <cactus@cactus.rulez.org>

Version: \$Id: unix9-wordfreq.sh 191 2006-03-29 11:07:00Z cactus $
EOF
exit 0
}

function error () {
echo ERROR: $@! >&2
exit 1
}

# Ez mashogy mukodik mint a tobbi: itt csak vegignezzuk az opciokat,
# hogy van-e koztuk -help

function options () {
[ -z "$1" ] && return
case "$1" in
-help)
help
;;
*)
[ -f "$1" -a -r "$1" ] || error "$1: Unable to open file"
shift
options "$@"
;;
esac
}

function awk_count () {
AWKPROG='
BEGIN {
FS="[^a-zA-ZáÁéÉíÍóÓöÖõÕúÚüÜûÛ]"

ekezet_lower["Á"] = "á";
ekezet_lower["É"] = "é";
ekezet_lower["Í"] = "í";
ekezet_lower["Ó"] = "ó";
ekezet_lower["Ö"] = "ö";
ekezet_lower["Õ"] = "õ";
ekezet_lower["Ú"] = "ú";
ekezet_lower["Ü"] = "ü";
ekezet_lower["Û"] = "û";
}

function iso88592_tolower (s) {
ret=""

for (j = 1; j <= length(s); j++)
ret = ret iso88592_tolower_c(substr(s, j, 1));
return ret;
}

function iso88592_tolower_c (c) {
if (match (c, "[a-zA-Z]"))
return tolower (c);
else if (c in ekezet_lower)
return ekezet_lower[c];

return c;
}

/[a-zA-ZáÁéÉíÍóÓöÖõÕúÚüÜûÛ]/ {
for (i = 1; i != NF + 1; i++)
{
if (match ($i, "^[a-zA-ZáÁéÉíÍóÓöÖõÕúÚüÜûÛ]+$"))
{
words[iso88592_tolower($i)]++;
}
}
}

END {
for (i in words)
printf "%s %s\n", i, words[i];
}
'
echo "awk '$AWKPROG' '$1' 2>/dev/null|sort"
}

# Ez elemenkenti feldolgozas, oriasi Fothi power :)
function awk_join () {
AWKPROG='
function read1 () {
if (!eof1) {
eof1 = (getline <= 0);
if (eof1)
return;

key1 = $1;
val1 = $2;

width = NF - 1;
for (i = 3; i <= NF; ++i)
val1 = sprintf ("%s %s", val1, $i);
}
}

function read2 () {
if (!eof2) {
eof2 = ((getline < f2) <= 0);
if (eof2)
return;

key2 = $1;
val2 = $2;
}
}

function join () {
printf "%s %s %s\n", key1, val1, val2;
}

function fill_from_1 () {
printf "%s %s 0\n", key1, val1, "0";
}

function fill_from_2 () {
printf "%s ", key2;
for (i = 0; i < width; ++i)
printf "0 ";
printf "%s\n", val2;
}

BEGIN {
FS=" ";

read1();
read2();

while (!eof1 || !eof2) {
if (key1 == key2) {
join();
read1();
read2();
} else if (eof2 || (!eof1 && (key1 < key2))) {
fill_from_1();
read1();
} else {
fill_from_2();
read2();
}
}
exit
}
'
echo "awk -v f2=<($1) '$AWKPROG' 2>/dev/null"
}

[ $# -lt 1 ] && error "Missing arguments"
options "$@"

fullpipe="$(awk_count $1)"
shift

for i in "$@"
do
counter="`awk_count $i`"
awkline="`awk_join "$counter"`"
fullpipe="$fullpipe | $awkline"
done

export LANG=
export LC_ALL=
export LC_CTYPE=
eval "$fullpipe"

转载自:http://gergo.erdi.hu/elte/2005-1-unix/
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: