CSV Parser

CSV Parser

Parsing an array of strings according to CSV.

Example:

  • Input:
    John,Smith,john.smith@gmail.com,Los Angeles,1
    Jane,Roberts,janer@msn.com,”San Francisco, CA”,0
    “Alexandra “”Alex”””,Menendez,alex.menendez@gmail.com,Miami,1
    “””Alexandra Alex”””
  • Output
    John|Smith|john.smith@gmail.com|Los Angeles|1
    Jane|Roberts|janer@msn.com|San Francisco, CA|0
    Alexandra “Alex”|Menendez|alex.menendez@gmail.com|Miami|1
    “Alexandra Alex”

test

Solution

  1. For comma, transform to |.
  2. If comma is inside a quote, don’t treat the comma as separated. Remove the quote and print the entire token. e.g. “San Francisco, CA” => San Francisco, CA
  3. If there are double quotes, remove one. e.g. “Alexandra “”Alex””” => Alexandra “Alex”. Note that “””Alexandra Alex””” becomes “Alexandra Alex” because we first remove the outer-most quote, and then remove one quote of the double quote.
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
#include <iostream>
#include <vector>
using namespace std;
void printVS(vector<string>& v){
for(const auto& s: v) cout<<s<<endl;
cout<<endl;
}
vector<string> parseCSV(const vector<string>& inputs){
vector<string> v;
for(int i = 0; i < inputs.size(); ++i){
string t = "", s = inputs[i];
bool inQuote = false;
for(int j = 0; j < s.length(); ++j){
if(!inQuote){
if(s[j] == '"') inQuote = true;
else if(s[j] == ',') t += "|";
else t += s[j];
}
else {
if(s[j] == '"'){
if(j == s.length()-1) break;
if(s[j+1] == '"') {
t += s[j++];
}
else{
inQuote = false;
t += "|";
j++;
}
}
else t += s[j];
}
}
v.push_back(t);
}
return v;
}
int main(int argc, char *argv[])
{
vector<string> input{"John,Smith,john.smith@gmail.com,Los Angeles,1",
"Jane,Roberts,janer@msn.com,\"San Francisco, CA\",0",
"\"Alexandra \"\"Alex\"\"\",Menendez,alex.menendez@gmail.com,Miami,1",
"\"\"\"Alexandra Alex\"\"\"",
"\"\"\"\"\"Alexandra Alex\"\"\"\"\""};
printVS(input);
vector<string> output = parseCSV(input);
printVS(output);
return 0;
}

Always welcome new ideas and practical tricks, just leave them in the comments!