Read CSV
Sometimes a CSV file will use quotes to wrap data (in case that data contains a comma that does not signify a new field). Since it's a common thing to do, a script to read a CSV should anticipate and deal that that eventuality. The main workhorse of this script is the function. By passing a CSV line to it, it will parse the fields into an array.
String[] parseCSVLine(String line, int index, int columnsToGet){
int START_STATE = 0;
int FIRST_QUOTE = 1;
int SECOND_QUOTE = 2;
int IN_WORD = 3;
int IN_WORD_WITHOUT_QUOTES = 4;
int state = START_STATE;
String word = "";
ArrayList lines = new ArrayList();
char[] chars = line.toCharArray();
for (int i = 0; i < chars.length; i++){
char c = chars[i];
if (c == '"'){
if (state == START_STATE){
state = FIRST_QUOTE;
}
else if ((state == FIRST_QUOTE) || (state == IN_WORD)){
state = SECOND_QUOTE;
}
else if (state == SECOND_QUOTE){
word += ("" + c);
state = IN_WORD;
}
}
else if (c == ','){
if ((state == SECOND_QUOTE) || (state == IN_WORD_WITHOUT_QUOTES)){
state = START_STATE;
lines.add(word);
if (lines.size() == columnsToGet) break;
word = "";
}
else if (state == START_STATE){
state = START_STATE;
lines.add(word.replaceAll("\"\"", "\""));
}
else{
word += ("" + c);
state = IN_WORD;
}
}
else{
if (state == START_STATE) state = IN_WORD_WITHOUT_QUOTES;
else if (state != IN_WORD_WITHOUT_QUOTES){
state = IN_WORD;
word += ("" + c);
}
}
}
if (lines.size() < columnsToGet){
if ((state == SECOND_QUOTE) || (state == IN_WORD_WITHOUT_QUOTES))
lines.add(word.replaceAll("\"\"", "\""));
}
String[] linesArray = new String[lines.size()];
for (int i = 0; i < lines.size(); i++){
linesArray[i] = (String) lines.get(i);
}
return linesArray;
}
// File from which to read.
File inputFile = new File( "test_input.csv" );
FileReader in = new FileReader( inputFile );
BufferedReader buffRead = new BufferedReader( in );
// Read the file in line-by-line.
int index = 0;
while( ( searchTerm = buffRead.readLine() )!=null){
// Don't read header row
if (index>0){
// Parse the line into an array
line = parseCSVLine(searchTerm, index, 5);
// Get the values
name = line[0];
date = line[1];
address = line[2];
city = line[3];
state = line[4];
zip = line[5];
// Set the needed values as session vaiables
session.setVariable("NAME", name);
session.setVariable("ZIP", zip);
// Scrape for those values
session.scrapeFile("Serach results");
}
index++;
}
// Close up the file.
in.close();
buffRead.close();
int START_STATE = 0;
int FIRST_QUOTE = 1;
int SECOND_QUOTE = 2;
int IN_WORD = 3;
int IN_WORD_WITHOUT_QUOTES = 4;
int state = START_STATE;
String word = "";
ArrayList lines = new ArrayList();
char[] chars = line.toCharArray();
for (int i = 0; i < chars.length; i++){
char c = chars[i];
if (c == '"'){
if (state == START_STATE){
state = FIRST_QUOTE;
}
else if ((state == FIRST_QUOTE) || (state == IN_WORD)){
state = SECOND_QUOTE;
}
else if (state == SECOND_QUOTE){
word += ("" + c);
state = IN_WORD;
}
}
else if (c == ','){
if ((state == SECOND_QUOTE) || (state == IN_WORD_WITHOUT_QUOTES)){
state = START_STATE;
lines.add(word);
if (lines.size() == columnsToGet) break;
word = "";
}
else if (state == START_STATE){
state = START_STATE;
lines.add(word.replaceAll("\"\"", "\""));
}
else{
word += ("" + c);
state = IN_WORD;
}
}
else{
if (state == START_STATE) state = IN_WORD_WITHOUT_QUOTES;
else if (state != IN_WORD_WITHOUT_QUOTES){
state = IN_WORD;
word += ("" + c);
}
}
}
if (lines.size() < columnsToGet){
if ((state == SECOND_QUOTE) || (state == IN_WORD_WITHOUT_QUOTES))
lines.add(word.replaceAll("\"\"", "\""));
}
String[] linesArray = new String[lines.size()];
for (int i = 0; i < lines.size(); i++){
linesArray[i] = (String) lines.get(i);
}
return linesArray;
}
// File from which to read.
File inputFile = new File( "test_input.csv" );
FileReader in = new FileReader( inputFile );
BufferedReader buffRead = new BufferedReader( in );
// Read the file in line-by-line.
int index = 0;
while( ( searchTerm = buffRead.readLine() )!=null){
// Don't read header row
if (index>0){
// Parse the line into an array
line = parseCSVLine(searchTerm, index, 5);
// Get the values
name = line[0];
date = line[1];
address = line[2];
city = line[3];
state = line[4];
zip = line[5];
// Set the needed values as session vaiables
session.setVariable("NAME", name);
session.setVariable("ZIP", zip);
// Scrape for those values
session.scrapeFile("Serach results");
}
index++;
}
// Close up the file.
in.close();
buffRead.close();
Alternatively you can read the csv via the opencsv package that is included with screen-scraper. This may be more robust for different formats of csv
import au.com.bytecode.opencsv.CSVReader;
//initialize the reader
File f = new File("input/AK.csv");
CSVReader reader = new CSVReader(new FileReader(f));
//read the file saving it into a List of Maps
String[] headers = reader.readNext();
List lines = new ArrayList();
String[] line;
while((line = reader.readNext())!=null)
{
Map m = new HashMap();
for(int i=0;i<headers.length;i++)
{
m.put(headers[i],line[i]);
}
lines.add(m);
}
reader.close();
//print out what we read
for(int i=0;i<lines.size();i++)
{
session.log(String.valueOf(lines.get(i)));
}
//initialize the reader
File f = new File("input/AK.csv");
CSVReader reader = new CSVReader(new FileReader(f));
//read the file saving it into a List of Maps
String[] headers = reader.readNext();
List lines = new ArrayList();
String[] line;
while((line = reader.readNext())!=null)
{
Map m = new HashMap();
for(int i=0;i<headers.length;i++)
{
m.put(headers[i],line[i]);
}
lines.add(m);
}
reader.close();
//print out what we read
for(int i=0;i<lines.size();i++)
{
session.log(String.valueOf(lines.get(i)));
}
scraper on 07/16/2010 at 4:53 pm
- Printer-friendly version
- Login or register to post comments