Skip to main content

Limpiador de diccionarios

Posted in

Con este sencillo script bash de producción casera, podemos límpiar nuestros diccionarios de palabras, estas son las opciones de que dispone:

sort & delete duplicated words & delete no-printable chars
delete the spaces
delete no-alfanumeric chars
convert all to tiny chars
count words
#!/bin/bash
#############
# Dabax.net #
#############

IN=$1
OUT=$2
TMP="dicclean.TEMP"

function help {
echo "1: sort & delete duplicated words & delete no-printable chars"
echo "2: delete the spaces"
echo "3: delete no-alfanumeric chars"
echo "4: convert all to tiny chars"
echo "5: run all before actions"
echo "6: count words"
echo "7: view help"
echo "8: exit"
}


if [ -z $IN -a -z $OUT ]; then 
	echo -e "./dicclean  "
	exit 1
fi

echo "Making temp file..."
cp $IN $OUT

help

while true; do

	echo -n "Enter option: "
	read OP

	case $OP in
	1) 
	sort -uib $OUT > $TMP	
    	mv $TMP $OUT
    	echo "done"
	;;
    	2)
	cat $OUT | sed -s 's/ //g' > $TMP
	mv $TMP $OUT
        echo "done"
	;;
	3)
	cat $OUT | grep -iv [^a-z,^0-9] > $TMP
	mv $TMP $OUT
	echo "done"
	;;
	4)
	cat $OUT |awk '{ print tolower($0) }'> $TMP
	mv $TMP $OUT
	echo "done"
	;;
	5) 
	cat $OUT | sed -s 's/ //g' | grep -iv [^a-z,^0-9] | awk '{ print tolower($0) }' | sort -uib  > $TMP
	mv $TMP $OUT
	echo "done"
	;;
	6)
	cat $OUT | wc -w
	;;
	7)
	help
	;;
	8)
	echo "happy hacking ;-)"
	exit 0
	;;
	*)
	echo "Wrong option!"
	;;
	esac
done