src/ NFC comment Issue #16

Issue #16
jtmoon79 · Aug 8, 2022 · c35066c · c35066c
1 parent 073b6ce
commit c35066c
Show file tree

Hide file tree

Showing 6 changed files with 35 additions and 37 deletions.
diff --git a/src/data/line.rs b/src/data/line.rs
@@ -126,7 +126,7 @@ impl fmt::Debug for LinePart {
 }
 
 impl LinePart {
-    // XXX: does not handle multi-byte encodings
+    // XXX: Issue #16 only handles UTF-8/ASCII encoding
     const _CHARSZ: usize = 1;
 
     /// create a new `LinePart`. Remember that `blocki_end` points to one byte past
@@ -223,7 +223,7 @@ impl LinePart {
     #[cfg(any(debug_assertions,test))]
     pub(self) fn impl_to_String_raw(self: &LinePart, raw: bool) -> String {
         // XXX: intermixing byte lengths and character lengths
-        // XXX: does not handle multi-byte
+        // XXX: Issue #16 only handles UTF-8/ASCII encoding
         let s1: String;
         let slice_ = self.as_slice();
         if raw {
@@ -655,7 +655,7 @@ impl Line {
     ///
     /// `raw` false will write transcode each byte to a character and use pictoral representations
     ///
-    /// XXX: `raw==false` does not handle multi-byte encodings
+    // XXX: Issue #16 `raw==false` only handles UTF-8/ASCII encoding
     #[cfg(any(debug_assertions,test))]
     pub fn print(self: &Line, raw: bool) {
         // is this an expensive command? should `stdout` be cached?
@@ -674,7 +674,7 @@ impl Line {
                     }
                 }
             } else {
-                // XXX: only handle single-byte encodings
+                // XXX: Issue #16 only handles UTF-8/ASCII encoding
                 // XXX: this is not efficient
                 let s = match std::str::from_utf8(slice) {
                     Ok(val) => val,

diff --git a/src/data/sysline.rs b/src/data/sysline.rs
@@ -127,7 +127,7 @@ impl Sysline {
     /// default `with_capacity` for a `Lines`, most often will only need 1 capacity
     /// as the found "sysline" will likely be one `Line`
     const SYSLINE_PARTS_WITH_CAPACITY: usize = 1;
-    // XXX: does not handle multi-byte encodings
+    // XXX: Issue #16 only handles UTF-8/ASCII encoding
     const CHARSZ: usize = 1;
 
     pub fn new() -> Sysline {
@@ -263,6 +263,7 @@ impl Sysline {
 
     /// get the last byte of this Sysline
     pub(crate) fn last_byte(self: &Sysline) -> Option<u8> {
+        // XXX: Issue #16 only handles UTF-8/ASCII encoding
         assert_eq!(self.charsz(), 1, "charsz {} not implemented", self.charsz());
         let len_ = self.lines.len();
         if len_ <= 0 {
@@ -317,7 +318,7 @@ impl Sysline {
             sz += (*lp).len();
         }
         // XXX: intermixing byte lengths and character lengths
-        // XXX: does not handle multi-byte
+        // XXX: Issue #16 only handles UTF-8/ASCII encoding
         let mut s_ = String::with_capacity(sz + 1);
         for lp in &self.lines {
             s_ += (*lp).impl_to_String_raw(raw).as_str();

diff --git a/src/printer_debug/printers.rs b/src/printer_debug/printers.rs
@@ -443,7 +443,7 @@ pub fn pretty_print(buffer: &[u8], raw: bool) {
     // is this an expensive command? should `stdout` be cached?
     let stdout: std::io::Stdout = std::io::stdout();
     let mut stdout_lock = stdout.lock();
-    // XXX: only handle single-byte encodings
+    // XXX: Issue #16 only handles UTF-8/ASCII encoding
     // XXX: doing this char by char is probably not efficient
     //let s = match str::from_utf8_lossy(buffer) {
     let s = match core::str::from_utf8(buffer) {

diff --git a/src/readers/blockreader.rs b/src/readers/blockreader.rs
@@ -362,7 +362,7 @@ impl BlockReader {
         }
         let path_std: &Path = Path::new(&path);
 
-        // TODO: pass in `mimeguess`; avoid repeats of the tedious operation
+        // TODO: Issue #15 pass in `mimeguess`; avoid repeats of the tedious operation
         let mimeguess_: MimeGuess = MimeGuess::from_path(path_std);
 
         let mut open_options = FileOpenOptions::new();

diff --git a/src/readers/linereader.rs b/src/readers/linereader.rs
@@ -106,7 +106,7 @@ pub struct LineReader {
     /// Distinct from `self.lines.len()` as that may have contents removed when --streaming
     pub (crate) lines_processed: Count,
     /// smallest size character in bytes
-    /// TODO: handle char sizes > 1 byte, multi-byte encodings
+    // XXX: Issue #16 only handles UTF-8/ASCII encoding
     charsz_: CharSz,
     /// enable internal LRU cache for `find_line` (default `true`)
     find_line_lru_cache_enabled: bool,
@@ -147,7 +147,7 @@ const CHARSZ_MIN: CharSz = 1;
 /// maximum char storage size in bytes
 const CHARSZ_MAX: CharSz = 4;
 /// default char storage size in bytes
-/// XXX: does not handle multi-byte encodings (e.g. UTF-8) or multi-byte character storage (e.g. UTF-32)
+// XXX: Issue #16 only handles UTF-8/ASCII encoding
 const CHARSZ: CharSz = CHARSZ_MIN;
 
 /// implement the LineReader things
@@ -160,7 +160,7 @@ impl LineReader {
 
     pub fn new(path: FPath, filetype: FileType, blocksz: BlockSz) -> Result<LineReader> {
         dpnxf!("LineReader::new({:?}, {:?}, {:?})", path, filetype, blocksz);
-        // XXX: multi-byte
+        // XXX: Issue #16 only handles UTF-8/ASCII encoding
         assert_ge!(
             blocksz,
             (CHARSZ_MIN as BlockSz),
@@ -473,7 +473,7 @@ impl LineReader {
         match self.get_linep(&fileoffset) {
             Some(linep) => {
                 dpo!("self.get_linep({}) returned @{:p}", fileoffset, linep);
-                // XXX: does not handle multi-byte
+                // XXX: Issue #16 only handles UTF-8/ASCII encoding
                 let fo_next: FileOffset = (*linep).fileoffset_end() + charsz_fo;
                 if self.is_line_last(&linep) {
                     if self.find_line_lru_cache_enabled {
@@ -622,16 +622,15 @@ impl LineReader {
         let bi_stop: BlockIndex = bptr_middle.len() as BlockIndex;
         assert_ge!(bi_stop, charsz_bi, "bi_stop is less than charsz; not yet handled");
 
-        // XXX: multi-byte
-        //bi_beg = bi_stop - charsz_bi;
+        // XXX: only handle UTF-8/ASCII encoding
         dpof!("({}) B1: scan middle block {} forwards, starting from blockindex {} (fileoffset {}) searching for newline B",
             fileoffset,
             bo_middle,
             bi_at,
             self.file_offset_at_block_offset_index(bo_middle, bi_at)
         );
         loop {
-            // XXX: single-byte encoding
+            // XXX: only handle UTF-8/ASCII encoding
             if (*bptr_middle)[bi_at] == NLu8 {
                 found_nl_b = true;
                 fo_nl_b = self.file_offset_at_block_offset_index(bo_middle, bi_at);
@@ -867,7 +866,7 @@ impl LineReader {
             BI_STOP,
         );
         loop {
-            // XXX: single-byte encoding
+            // XXX: Issue #16 only handles UTF-8/ASCII encoding
             if (*bptr_middle)[bi_at] == NLu8 {
                 found_nl_a = true;
                 fo_nl_a = self.file_offset_at_block_offset_index(bo_middle, bi_at);
@@ -880,15 +879,15 @@ impl LineReader {
                     byte_to_char_noraw((*bptr_middle)[bi_at]),
                 );
                 // adjust offsets one forward
-                // XXX: single-byte encoding
+                // XXX: Issue #16 only handles UTF-8/ASCII encoding
                 fo_nl_a1 = fo_nl_a + charsz_fo;
                 bi_at += charsz_bi;
                 break;
             }
             if bi_at == 0 {
                 break;
             }
-            // XXX: single-byte encoding
+            // XXX: Issue #16 only handles UTF-8/ASCII encoding
             bi_at -= charsz_bi;
             if bi_at < BI_STOP {
                 break;
@@ -982,8 +981,6 @@ impl LineReader {
     ///     fine_line(2) -> 2,2 "y"
     /// ```
     ///
-    /// XXX: presumes a single-byte can represent a '\n'; i.e. does not handle UTF-16 or UTF-32 or other.
-    ///
     /// XXX: returning the "next fileoffset (along with `LineP`) is jenky. Just return the `LineP`.
     ///      and/or add `iter` capabilities to `Line` that will hide tracking the "next fileoffset".
     ///
@@ -992,7 +989,7 @@ impl LineReader {
     ///      Changes require extensive retesting.
     ///      You've been warned.
     ///
-    // TODO: [2021/08/30] handle different encodings
+    // XXX: Issue #16 only handles UTF-8/ASCII encoding
     pub fn find_line(&mut self, fileoffset: FileOffset) -> ResultS4LineFind {
         dpnf!("(LineReader@{:p}, {})", self, fileoffset);
 
@@ -1091,11 +1088,11 @@ impl LineReader {
             let mut bi_at: BlockIndex = bi_middle;
             let bi_stop: BlockIndex = bptr_middle.len() as BlockIndex;
             assert_ge!(bi_stop, charsz_bi, "bi_stop is less than charsz; not yet handled");
-            // XXX: multi-byte
+            // XXX: Issue #16 only handles UTF-8/ASCII encoding
             //bi_beg = bi_stop - charsz_bi;
             dpof!("B1: scan middle block {} forwards (block len {}), starting from blockindex {} (fileoffset {}) searching for newline B", bo_middle, (*bptr_middle).len(), bi_at, self.file_offset_at_block_offset_index(bo_middle, bi_at));
             loop {
-                // XXX: single-byte encoding
+                // XXX: Issue #16 only handles UTF-8/ASCII encoding
                 if (*bptr_middle)[bi_at] == NLu8 {
                     found_nl_b = true;
                     fo_nl_b = self.file_offset_at_block_offset_index(bo_middle, bi_at);
@@ -1175,7 +1172,7 @@ impl LineReader {
                 bi_end = (*bptr).len() as BlockIndex;
                 assert_ge!(bi_end, charsz_bi, "blockindex bi_end {} is less than charsz; not yet handled, file {:?}", bi_end, self.path());
                 assert_ne!(bi_end, 0, "blockindex bi_end is zero; Block at blockoffset {}, BlockP @0x{:p}, has len() zero", bof, bptr);
-                // XXX: multi-byte
+                // XXX: Issue #16 only handles UTF-8/ASCII encoding
                 //bi_beg = bi_end - charsz_bi;
                 dpof!(
                     "B2: scan block {} forwards, starting from blockindex {} (fileoffset {}) up to blockindex {} searching for newline B",
@@ -1185,7 +1182,7 @@ impl LineReader {
                     bi_end,
                 );
                 loop {
-                    // XXX: single-byte encoding
+                    // XXX: Issue #16 only handles UTF-8/ASCII encoding
                     if (*bptr)[bi_beg] == NLu8 {
                         found_nl_b = true;
                         fo_nl_b = self.file_offset_at_block_offset_index(bof, bi_beg);
@@ -1413,7 +1410,7 @@ impl LineReader {
                 bo_middle, bi_at, self.file_offset_at_block_offset_index(bo_middle, bi_at), BI_STOP,
             );
             loop {
-                // XXX: single-byte encoding
+                // XXX: Issue #16 only handles UTF-8/ASCII encoding
                 if (*bptr_middle)[bi_at] == NLu8 {
                     found_nl_a = true;
                     fo_nl_a = self.file_offset_at_block_offset_index(bo_middle, bi_at);
@@ -1425,15 +1422,15 @@ impl LineReader {
                         byte_to_char_noraw((*bptr_middle)[bi_at]),
                     );
                     // adjust offsets one forward
-                    // XXX: single-byte encoding
+                    // XXX: Issue #16 only handles UTF-8/ASCII encoding
                     fo_nl_a1 = fo_nl_a + charsz_fo;
                     bi_at += charsz_bi;
                     break;
                 }
                 if bi_at == 0 {
                     break;
                 }
-                // XXX: single-byte encoding
+                // XXX: Issue #16 only handles UTF-8/ASCII encoding
                 bi_at -= charsz_bi;
                 if bi_at < BI_STOP {
                     break;
@@ -1523,7 +1520,7 @@ impl LineReader {
                     bof, bi_at, self.file_offset_at_block_offset_index(bof, bi_at), BI_STOP,
                 );
                 loop {
-                    // XXX: single-byte encoding
+                    // XXX: Issue #16 only handles UTF-8/ASCII encoding
                     if (*bptr)[bi_at] == NLu8 {
                         found_nl_a = true;
                         fo_nl_a = self.file_offset_at_block_offset_index(bof, bi_at);
@@ -1535,7 +1532,7 @@ impl LineReader {
                             byte_to_char_noraw((*bptr)[bi_at]),
                         );
                         // adjust offsets one forward
-                        // XXX: single-byte encoding
+                        // XXX: Issue #16 only handles UTF-8/ASCII encoding
                         fo_nl_a1 = fo_nl_a + charsz_fo;
                         bi_at += charsz_bi;
                         let bof_a1 = self.block_offset_at_file_offset(fo_nl_a1);
@@ -1573,7 +1570,7 @@ impl LineReader {
                     if bi_at == 0 {
                         break;
                     }
-                    // XXX: single-byte encoding
+                    // XXX: Issue #16 only handles UTF-8/ASCII encoding
                     bi_at -= charsz_bi;
                     if bi_at < BI_STOP {
                         break;

diff --git a/src/readers/syslinereader.rs b/src/readers/syslinereader.rs
@@ -623,7 +623,7 @@ impl SyslineReader {
         dpnf!("syslines.insert({}, Sysline @[{}, {}] datetime: {:?})", fo_beg, (*syslinep).fileoffset_begin(), (*syslinep).fileoffset_end(), (*syslinep).dt());
         self.syslines.insert(fo_beg, syslinep.clone());
         self.syslines_count += 1;
-        // XXX: multi-byte character
+        // XXX: Issue #16 only handles UTF-8/ASCII encoding
         let fo_end1: FileOffset = fo_end + (self.charsz() as FileOffset);
         dpxf!("syslines_by_range.insert(({}‥{}], {})", fo_beg, fo_end1, fo_beg);
         self.syslines_by_range.insert(fo_beg..fo_end1, fo_beg);
@@ -734,7 +734,7 @@ impl SyslineReader {
                 dpof!("line too short {} for  requested start {}; continue", line.len(), dtpd.range_regex.start);
                 continue;
             }
-            // XXX: does not support multi-byte string; assumes single-byte
+            // XXX: Issue #16 only handles UTF-8/ASCII encoding
             let slice_end: usize;
             if line.len() > dtpd.range_regex.end {
                 slice_end = dtpd.range_regex.end;
@@ -1081,7 +1081,7 @@ impl SyslineReader {
                 self.syslines_by_range_hit += 1;
                 let fo: &FileOffset = range_fo.1;
                 let syslinep: SyslineP = self.syslines[fo].clone();
-                // XXX: multi-byte character encoding
+                // XXX: Issue #16 only handles UTF-8/ASCII encoding
                 let fo_next: FileOffset = (*syslinep).fileoffset_next() + (self.charsz() as FileOffset);
                 if self.is_sysline_last(&syslinep) {
                     dpxf!(
@@ -1123,7 +1123,7 @@ impl SyslineReader {
             self.syslines_hit += 1;
             dpof!("hit self.syslines for FileOffset {}", fileoffset);
             let syslinep: SyslineP = self.syslines[&fileoffset].clone();
-            // XXX: multi-byte character encoding
+            // XXX: Issue #16 only handles UTF-8/ASCII encoding
             let fo_next: FileOffset = (*syslinep).fileoffset_end() + (self.charsz() as FileOffset);
             if self.is_sysline_last(&syslinep) {
                 dpof!(
@@ -1502,7 +1502,7 @@ impl SyslineReader {
                 debug_assert!(self.syslines_by_range.contains_key(&fo1), "self.syslines.contains_key({}) however, self.syslines_by_range.contains_key({}); syslines_by_range out of synch", fo1, fo1);
                 dpo!("find_sysline: hit self.syslines for FileOffset {}", fo1);
                 let syslinep = self.syslines[&fo1].clone();
-                // XXX: multi-byte character encoding
+                // XXX: Issue #16 only handles UTF-8/ASCII encoding
                 let fo_next = (*syslinep).fileoffset_end() + (self.charsz() as FileOffset);
                 // TODO: determine if `fileoffset` is the last sysline of the file
                 //       should add a private helper function for this task `is_sysline_last(FileOffset)` ... something like that
@@ -1535,7 +1535,7 @@ impl SyslineReader {
                     self.syslines_by_range_hit += 1;
                     let fo = range_fo.1;
                     let syslinep = self.syslines[fo].clone();
-                    // XXX: multi-byte character encoding
+                    // XXX: Issue #16 only handles UTF-8/ASCII encoding
                     let fo_next = (*syslinep).fileoffset_next() + (self.charsz() as FileOffset);
                     if self.find_sysline_lru_cache_enabled {
                         self.find_sysline_lru_cache_put += 1;