From 5d681a72ac90ea9a3c45c71193f11ba60ce94069 Mon Sep 17 00:00:00 2001
From: Lucien Greathouse <me@lpghatguy.com>
Date: Thu, 4 Apr 2019 18:21:55 -0700
Subject: [PATCH] Rewrite CSV conversion to dodge Serde (#152)

* Rewrite CSV conversion to dodge Serde

* Update CHANGELOG
---
 CHANGELOG.md                                  |   2 +
 server/src/rbx_snapshot.rs                    | 155 ++++++++++--------
 .../localization/expected-snapshot.json       |  16 ++
 .../localization/src/integers-bug-145.csv     |   2 +
 4 files changed, 107 insertions(+), 68 deletions(-)
 create mode 100644 test-projects/localization/src/integers-bug-145.csv

diff --git a/CHANGELOG.md b/CHANGELOG.md
index d9ee305e..2c659d0a 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,6 +4,8 @@
 * Changed `rojo build` to use buffered I/O, which can make it up to 2x faster in some cases.
     * Building [*Road Not Taken*](https://github.com/LPGhatguy/roads) to an `rbxlx` file dropped from 150ms to 70ms on my machine
 * Fixed `LocalizationTable` instances being made from `csv` files not supporting empty rows or columns. ([#149](https://github.com/LPGhatguy/rojo/pull/149))
+* Fixed CSV files with entries that parse as numbers causing Rojo to panic ([#152](https://github.com/LPGhatguy/rojo/pull/152))
+* Improved error messages when malformed CSV files are found in a Rojo project
 
 ## [0.5.0 Alpha 8](https://github.com/LPGhatguy/rojo/releases/tag/v0.5.0-alpha.8) (March 29, 2019)
 * Added support for a bunch of new types when dealing with XML model/place files:
diff --git a/server/src/rbx_snapshot.rs b/server/src/rbx_snapshot.rs
index 902b5402..d5fb72ad 100644
--- a/server/src/rbx_snapshot.rs
+++ b/server/src/rbx_snapshot.rs
@@ -106,6 +106,7 @@ pub enum SnapshotError {
     },
 
     XmlModelDecodeError {
+        #[fail(cause)]
         inner: rbx_xml::DecodeError,
         path: PathBuf,
     },
@@ -115,6 +116,12 @@ pub enum SnapshotError {
         path: PathBuf,
     },
 
+    CsvDecodeError {
+        #[fail(cause)]
+        inner: csv::Error,
+        path: PathBuf,
+    },
+
     ProjectNodeUnusable,
 
     ProjectNodeInvalidTransmute {
@@ -151,6 +158,9 @@ impl fmt::Display for SnapshotError {
             SnapshotError::BinaryModelDecodeError { inner, path } => {
                 write!(output, "Malformed rbxm model: {:?} in path {}", inner, path.display())
             },
+            SnapshotError::CsvDecodeError { inner, path } => {
+                write!(output, "Malformed csv file: {} in path {}", inner, path.display())
+            },
             SnapshotError::ProjectNodeUnusable => {
                 write!(output, "Rojo project nodes must specify either $path or $className.")
             },
@@ -475,17 +485,87 @@ fn snapshot_txt_file<'source>(
 fn snapshot_csv_file<'source>(
     file: &'source ImfsFile,
 ) -> SnapshotResult<'source> {
+    /// Struct that holds any valid row from a Roblox CSV translation table.
+    ///
+    /// We manually deserialize into this table from CSV, but let JSON handle
+    /// serializing.
+    #[derive(Debug, Default, Serialize)]
+    #[serde(rename_all = "camelCase")]
+    struct LocalizationEntry<'a> {
+        #[serde(skip_serializing_if = "Option::is_none")]
+        key: Option<&'a str>,
+
+        #[serde(skip_serializing_if = "Option::is_none")]
+        context: Option<&'a str>,
+
+        #[serde(skip_serializing_if = "Option::is_none")]
+        example: Option<&'a str>,
+
+        #[serde(skip_serializing_if = "Option::is_none")]
+        source: Option<&'a str>,
+
+        values: HashMap<&'a str, &'a str>,
+    }
+
     let instance_name = file.path
         .file_stem().expect("Could not extract file stem")
         .to_str().expect("Could not convert path to UTF-8");
 
-    let entries: Vec<LocalizationEntryJson> = csv::Reader::from_reader(file.contents.as_slice())
-        .deserialize()
-        // TODO: Propagate error upward instead of panicking
-        .map(|result| result.expect("Malformed localization table found!"))
-        .filter(|entry: &LocalizationEntryCsv| !(entry.key.is_empty() && entry.source.is_empty()))
-        .map(LocalizationEntryCsv::to_json)
-        .collect();
+    // Normally, we'd be able to let the csv crate construct our struct for us.
+    //
+    // However, because of a limitation with Serde's 'flatten' feature, it's not
+    // possible presently to losslessly collect extra string values while using
+    // csv+Serde.
+    //
+    // https://github.com/BurntSushi/rust-csv/issues/151
+    let mut reader = csv::Reader::from_reader(file.contents.as_slice());
+
+    let headers = reader.headers()
+        .map_err(|inner| SnapshotError::CsvDecodeError {
+            inner,
+            path: file.path.to_path_buf(),
+        })?
+        .clone();
+
+    let mut records = Vec::new();
+
+    for record in reader.into_records() {
+        let record = record
+            .map_err(|inner| SnapshotError::CsvDecodeError {
+                inner,
+                path: file.path.to_path_buf(),
+            })?;
+
+        records.push(record);
+    }
+
+    let mut entries = Vec::new();
+
+    for record in &records {
+        let mut entry = LocalizationEntry::default();
+
+        for (header, value) in headers.iter().zip(record.into_iter()) {
+            if header.is_empty() || value.is_empty() {
+                continue;
+            }
+
+            match header {
+                "Key" => entry.key = Some(value),
+                "Source" => entry.source = Some(value),
+                "Context" => entry.context = Some(value),
+                "Example" => entry.example = Some(value),
+                _ => {
+                    entry.values.insert(header, value);
+                }
+            }
+        }
+
+        if entry.key.is_none() && entry.source.is_none() {
+            continue;
+        }
+
+        entries.push(entry);
+    }
 
     let table_contents = serde_json::to_string(&entries)
         .expect("Could not encode JSON for localization table");
@@ -507,67 +587,6 @@ fn snapshot_csv_file<'source>(
     }))
 }
 
-#[derive(Debug, Serialize, Deserialize)]
-#[serde(rename_all = "PascalCase")]
-struct LocalizationEntryCsv {
-    key: String,
-    context: String,
-    example: String,
-    source: String,
-    #[serde(flatten)]
-    values: HashMap<String, String>,
-}
-
-impl LocalizationEntryCsv {
-    fn to_json(self) -> LocalizationEntryJson {
-        fn none_if_empty(value: String) -> Option<String> {
-            if value.is_empty() {
-                None
-            } else {
-                Some(value)
-            }
-        }
-
-        let key = none_if_empty(self.key);
-        let context = none_if_empty(self.context);
-        let example = none_if_empty(self.example);
-        let source = none_if_empty(self.source);
-
-        let mut values = HashMap::with_capacity(self.values.len());
-        for (key, value) in self.values.into_iter() {
-            if !key.is_empty() {
-                values.insert(key, value);
-            }
-        }
-
-        LocalizationEntryJson {
-            key,
-            context,
-            example,
-            source,
-            values,
-        }
-    }
-}
-
-#[derive(Debug, Serialize, Deserialize)]
-#[serde(rename_all = "camelCase")]
-struct LocalizationEntryJson {
-    #[serde(skip_serializing_if = "Option::is_none")]
-    key: Option<String>,
-
-    #[serde(skip_serializing_if = "Option::is_none")]
-    context: Option<String>,
-
-    #[serde(skip_serializing_if = "Option::is_none")]
-    example: Option<String>,
-
-    #[serde(skip_serializing_if = "Option::is_none")]
-    source: Option<String>,
-
-    values: HashMap<String, String>,
-}
-
 fn snapshot_json_model_file<'source>(
     file: &'source ImfsFile,
 ) -> SnapshotResult<'source> {
diff --git a/test-projects/localization/expected-snapshot.json b/test-projects/localization/expected-snapshot.json
index 1b706d65..dc093685 100644
--- a/test-projects/localization/expected-snapshot.json
+++ b/test-projects/localization/expected-snapshot.json
@@ -19,6 +19,22 @@
         "project_definition": null
       }
     },
+    {
+      "name": "integers-bug-145",
+      "class_name": "LocalizationTable",
+      "properties": {
+        "Contents": {
+          "Type": "String",
+          "Value": "[{\"key\":\"Count\",\"example\":\"A number demonstrating issue 145\",\"source\":\"3\",\"values\":{\"es\":\"7\"}}]"
+        }
+      },
+      "children": [],
+      "metadata": {
+        "ignore_unknown_instances": false,
+        "source_path": "src/integers-bug-145.csv",
+        "project_definition": null
+      }
+    },
     {
       "name": "normal",
       "class_name": "LocalizationTable",
diff --git a/test-projects/localization/src/integers-bug-145.csv b/test-projects/localization/src/integers-bug-145.csv
new file mode 100644
index 00000000..a865f3fd
--- /dev/null
+++ b/test-projects/localization/src/integers-bug-145.csv
@@ -0,0 +1,2 @@
+Key,Source,Context,Example,es
+Count,3,,A number demonstrating issue 145,7
\ No newline at end of file