#!/bin/bash #SBATCH --job-name=toe #SBATCH --account=nn9106k #SBATCH --time=72:00:00 #SBATCH --mail-type=FAIL #SBATCH --nodes=1 #SBATCH --ntasks-per-node=6 #SBATCH --mem-per-cpu=2048M if [ -n "${SLURM_JOB_ID}" ]; then . /cluster/bin/jobsetup; if [ -n "${SCRATCH}" ]; then export LAPTMP=${SCRATCH}/tmp; mkdir -p ${LAPTMP} > /dev/null 2>&1; export TMPDIR=${LAPTMP}; fi fi if [ -z "${1}" ]; then echo "toe: missing input file argument; exit." exit 1; fi if [ -z "${LAPTREE}" ]; then LAPTREE=/projects/lap/development/trunk/tree; fi . ${LAPTREE}/etc/library.bash; base=$(dirname ${1}); for name in $(cat ${1}); do input=${base}/txt/${name}.txt; output=${base}/rdf/${name}.ttl; if [ ! -f ${input} ]; then echo "toe: skipping invalid file ‘${input}’; continue." continue; fi echo "toe: processing ‘${input}’ ..."; ${LAPTREE}/etc/driver ==tool import ==process text \ ${input} ${LAPTMP}/media.${USER}.${$}.rpt; ${LAPTREE}/etc/driver ==tool tokenizer \ ${LAPTMP}/media.${USER}.${$}.rpt ${LAPTMP}/tokenizer.${USER}.${$}.rpt; ${LAPTREE}/etc/driver ==tool repp \ --segmenter tokenizer --style repp \ ${LAPTMP}/tokenizer.${USER}.${$}.rpt ${LAPTMP}/repp.${USER}.${$}.rpt ${LAPTREE}/etc/driver ==tool bn \ --segmenter tokenizer --tokenizer repp \ ${LAPTMP}/repp.${USER}.${$}.rpt ${LAPTMP}/bn.${USER}.${$}.rpt ${LAPTREE}/etc/driver ==tool export ==process rdf \ ${LAPTMP}/bn.${USER}.${$}.rpt ${output}; # # in case an intermediate processing step failed, try deleting all receipts # ${LAPLIBRARY}/bin/store --purge ${LAPTMP}/bn.${USER}.${$}.rpt; ${LAPLIBRARY}/bin/store --purge ${LAPTMP}/repp.${USER}.${$}.rpt; ${LAPLIBRARY}/bin/store --purge ${LAPTMP}/tokenizer.${USER}.${$}.rpt; ${LAPLIBRARY}/bin/store --purge ${LAPTMP}/media.${USER}.${$}.rpt; echo -n "output: " /bin/ls -l "${output}"; done