Permalink
Browse files

Supporting different types of newlines

Only \n (Unix) newlines were supported. The use of \r (nominally old
Macintosh) is widespread, and failed on the first government-generated
CSV file that I tried. Also common are \r\n (Windows) newlines. This
will determine which type of newline is most common in the file and
default to that.
  • Loading branch information...
1 parent f41cb00 commit 4c6a95ccc0d25b4eea94d4f4e4b537e3cb1b2f47 @waldoj waldoj committed Jan 1, 2013
Showing with 20 additions and 2 deletions.
  1. +20 −2 class.csv-to-api.php
View
@@ -47,6 +47,7 @@ function parse() {
// Create an instance of the parser for the requested file format (e.g. CSV)
$parser = 'parse_' . $this->source_format;
+
if ( !method_exists( $this, $parser ) ) {
header( '400 Bad Request' );
die( 'Format not supported' );
@@ -156,8 +157,25 @@ function xml_entities( $string ) {
* Turn CSV into a PHP array.
*/
function parse_csv( $csv ) {
-
- $lines = explode( "\n", $csv );
+
+ /*
+ * Determine which character to use to break up lines based on which one is the
+ * most common. If they're both just as common, then they're Windows newlines.
+ */
+ $newlines_unix = substr_count($csv, "\n" );
+ $newlines_mac = substr_count($csv, "\r" );
+ if ( $newlines_unix > $newlines_mac ) {
+ $newline = "\n";
+ }
+ elseif ($newlines_unix == $newlines_mac) {
+ $newline = "\r\n";
+ }
+ else {
+ $newline = "\r";
+ }
+
+ $lines = explode( $newline, $csv );
+
$headers = str_getcsv( array_shift( $lines ) );
$data = array();
foreach ( $lines as $line ) {

0 comments on commit 4c6a95c

Please sign in to comment.