Here’s a useful class that reads a CSV file and returns each row of values. Its based on some regular expressions I found on the net (unfortunately URL got lost, but its a fairly straightforward one to fine). Call back functions to make it a convenient way to read the input CSV file.
// create the object with the input path
def csv = new CsvFileReader(inputFilePath))
// define a closure that handles each row of data
// returned as a map of column name (defined by header row)
csv.forEachRow () {
print (it['ColumnName']) }
Here’s the entire code…
/**
*
* Class that consumes a CSV file, including double quotes, and then
* returns each row of values iteratively as a map
*
* Usage:
*
* // create the object with the input path
* def csv = new CsvFileReader(inputFilePath))
*
* // define a closure that handles each row of data
* // returned as a map of column name (defined by header row)
* csv.forEachRow () {
* print (it['ColumnName']) }
*
* User: Sanjay Mysoremutt
* Last Updated: Mar 11, 2010
**/
import java.text.SimpleDateFormat
class CsvFileReader {
CsvFileReader(def fileName) {
this.fileName = fileName
}
def fileName
def dateFormat = /\d{1,2}-\d{1,2}-\d{2,4} \d{1,2}:\d{1,2}(:\d{1,2}){0,1}/
//def dateTimeFormat = /\d{1,2}-\d{1,2}-\d{2,4} \d{1,2}:\d{1,2}(:\d{1,2}){0,1}/
def dateTimeFormat = /\d{1,2}\/\d{1,2}\/\d{2,4} \d{1,2}:\d{1,2}(:\d{1,2}){0,1}/
def currencyFormat = /\$(\d)*(.){0,1}(\d)+/
def nullIfEmpty = { instr ->
return instr == ''? null: instr
}
def df = new SimpleDateFormat('MM/dd/yyyy') //('dd-MM-yyyy')
def forEachRow(forEachRowCallBack) {
def idx = 0
def items
def headingRow = true // to differentiate the first row, set to false
def header = []
def values = [ : ]
def columnIdx
new File(fileName).eachLine() { fields ->
// This reads comma separated line and puts it into a hashmap
// It skips commas that are within double quotes
items = fields.split(/,(?=([^"]*"[^"]*")*(?![^"]*"))/)*.replaceAll(/"/, "")
// " now we check for the headingRow being true only for the first time,
// and immediately set it to false for subsequent rows!
if (headingRow) {
headingRow = false
header = items
}
else {
columnIdx = 0
// return the values in a map of header name and column names
header.each {
values[it] = items[columnIdx++]
// convert any dates into date objects
if (values[it] ==~ dateTimeFormat) {
// convert date to date object
// println "matched date format"
values[it] = df.parse(values[it])
// println values[it]
}
else if (values[it] ==~ currencyFormat) {
// convert currency to decimal object
// println "matched currency format"
values[it] = new BigDecimal(values[it].substring(1))
// println values[it]
}
else {
// convert any empty strings to nulls
values[it] = nullIfEmpty(values[it])
}
}
forEachRowCallBack(values)
}
}
}
}