#! /bin/sh # $Id: cvs_Lu.sh,v 1.11 2020/05/14 12:59:06 hako Exp $ # # cvs_Lu.sh is a filter for CVS. # # Convert the line break of *.csv files to LF. # Preserve the UTF-8 BOM. # Delete the UTF-8 BOM from ASCII csv files. # # by Hiroshi Hakoyama # # Background: # CVS needs LF, but Excel provides CRLF for *.csv files. # Excel needs the UTF-8 BOM to read the UTF-8 encoding and provides the BOM in a UTF-8 csv. # The BOM does not cause trouble for the read.csv() function in R. # Excel can read a csv file with LF. # Therefore, before commit UTF-8 csv files to CVS server, we should convert the line breaks to LF and preserve the UTF-8 BOM. # # Solution: # A CVS filter to change line breaks to LF for *.csv files. # # Usage: # Add an alias to tcsh # alias cvs 'cvs_Lu.sh' if [ "$1" = "commit" -o "$1" = "import" ]; then find . -type f -exec nkf -g {} + | grep -e UTF-8 | grep -e .csv | sed -e 's/: UTF-8//g' | tr '\n' '\0' | xargs -0 nkf --oc=UTF-8-BOM -Lu --in-place find . -type f -exec nkf -g {} + | grep -e ASCII | grep -e .csv | sed -e 's/: ASCII//g' | tr '\n' '\0' | xargs -0 nkf -Lu --in-place find . -type f -exec nkf -g {} + | grep -e Shift_JIS | grep -e .csv | sed -e 's/: Shift_JIS//g' | tr '\n' '\0' | xargs -0 nkf --oc=Shift_JIS -Lu --in-place cvs "$@" else cvs "$@" fi exit 0