Skip to content

Commit

Permalink
src/ NFC comment Issue #16
Browse files Browse the repository at this point in the history
Issue #16
  • Loading branch information
jtmoon79 committed Aug 8, 2022
1 parent 073b6ce commit c35066c
Show file tree
Hide file tree
Showing 6 changed files with 35 additions and 37 deletions.
8 changes: 4 additions & 4 deletions src/data/line.rs
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ impl fmt::Debug for LinePart {
}

impl LinePart {
// XXX: does not handle multi-byte encodings
// XXX: Issue #16 only handles UTF-8/ASCII encoding
const _CHARSZ: usize = 1;

/// create a new `LinePart`. Remember that `blocki_end` points to one byte past
Expand Down Expand Up @@ -223,7 +223,7 @@ impl LinePart {
#[cfg(any(debug_assertions,test))]
pub(self) fn impl_to_String_raw(self: &LinePart, raw: bool) -> String {
// XXX: intermixing byte lengths and character lengths
// XXX: does not handle multi-byte
// XXX: Issue #16 only handles UTF-8/ASCII encoding
let s1: String;
let slice_ = self.as_slice();
if raw {
Expand Down Expand Up @@ -655,7 +655,7 @@ impl Line {
///
/// `raw` false will write transcode each byte to a character and use pictoral representations
///
/// XXX: `raw==false` does not handle multi-byte encodings
// XXX: Issue #16 `raw==false` only handles UTF-8/ASCII encoding
#[cfg(any(debug_assertions,test))]
pub fn print(self: &Line, raw: bool) {
// is this an expensive command? should `stdout` be cached?
Expand All @@ -674,7 +674,7 @@ impl Line {
}
}
} else {
// XXX: only handle single-byte encodings
// XXX: Issue #16 only handles UTF-8/ASCII encoding
// XXX: this is not efficient
let s = match std::str::from_utf8(slice) {
Ok(val) => val,
Expand Down
5 changes: 3 additions & 2 deletions src/data/sysline.rs
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ impl Sysline {
/// default `with_capacity` for a `Lines`, most often will only need 1 capacity
/// as the found "sysline" will likely be one `Line`
const SYSLINE_PARTS_WITH_CAPACITY: usize = 1;
// XXX: does not handle multi-byte encodings
// XXX: Issue #16 only handles UTF-8/ASCII encoding
const CHARSZ: usize = 1;

pub fn new() -> Sysline {
Expand Down Expand Up @@ -263,6 +263,7 @@ impl Sysline {

/// get the last byte of this Sysline
pub(crate) fn last_byte(self: &Sysline) -> Option<u8> {
// XXX: Issue #16 only handles UTF-8/ASCII encoding
assert_eq!(self.charsz(), 1, "charsz {} not implemented", self.charsz());
let len_ = self.lines.len();
if len_ <= 0 {
Expand Down Expand Up @@ -317,7 +318,7 @@ impl Sysline {
sz += (*lp).len();
}
// XXX: intermixing byte lengths and character lengths
// XXX: does not handle multi-byte
// XXX: Issue #16 only handles UTF-8/ASCII encoding
let mut s_ = String::with_capacity(sz + 1);
for lp in &self.lines {
s_ += (*lp).impl_to_String_raw(raw).as_str();
Expand Down
2 changes: 1 addition & 1 deletion src/printer_debug/printers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -443,7 +443,7 @@ pub fn pretty_print(buffer: &[u8], raw: bool) {
// is this an expensive command? should `stdout` be cached?
let stdout: std::io::Stdout = std::io::stdout();
let mut stdout_lock = stdout.lock();
// XXX: only handle single-byte encodings
// XXX: Issue #16 only handles UTF-8/ASCII encoding
// XXX: doing this char by char is probably not efficient
//let s = match str::from_utf8_lossy(buffer) {
let s = match core::str::from_utf8(buffer) {
Expand Down
2 changes: 1 addition & 1 deletion src/readers/blockreader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -362,7 +362,7 @@ impl BlockReader {
}
let path_std: &Path = Path::new(&path);

// TODO: pass in `mimeguess`; avoid repeats of the tedious operation
// TODO: Issue #15 pass in `mimeguess`; avoid repeats of the tedious operation
let mimeguess_: MimeGuess = MimeGuess::from_path(path_std);

let mut open_options = FileOpenOptions::new();
Expand Down
43 changes: 20 additions & 23 deletions src/readers/linereader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ pub struct LineReader {
/// Distinct from `self.lines.len()` as that may have contents removed when --streaming
pub (crate) lines_processed: Count,
/// smallest size character in bytes
/// TODO: handle char sizes > 1 byte, multi-byte encodings
// XXX: Issue #16 only handles UTF-8/ASCII encoding
charsz_: CharSz,
/// enable internal LRU cache for `find_line` (default `true`)
find_line_lru_cache_enabled: bool,
Expand Down Expand Up @@ -147,7 +147,7 @@ const CHARSZ_MIN: CharSz = 1;
/// maximum char storage size in bytes
const CHARSZ_MAX: CharSz = 4;
/// default char storage size in bytes
/// XXX: does not handle multi-byte encodings (e.g. UTF-8) or multi-byte character storage (e.g. UTF-32)
// XXX: Issue #16 only handles UTF-8/ASCII encoding
const CHARSZ: CharSz = CHARSZ_MIN;

/// implement the LineReader things
Expand All @@ -160,7 +160,7 @@ impl LineReader {
pub fn new(path: FPath, filetype: FileType, blocksz: BlockSz) -> Result<LineReader> {
dpnxf!("LineReader::new({:?}, {:?}, {:?})", path, filetype, blocksz);
// XXX: multi-byte
// XXX: Issue #16 only handles UTF-8/ASCII encoding
assert_ge!(
blocksz,
(CHARSZ_MIN as BlockSz),
Expand Down Expand Up @@ -473,7 +473,7 @@ impl LineReader {
match self.get_linep(&fileoffset) {
Some(linep) => {
dpo!("self.get_linep({}) returned @{:p}", fileoffset, linep);
// XXX: does not handle multi-byte
// XXX: Issue #16 only handles UTF-8/ASCII encoding
let fo_next: FileOffset = (*linep).fileoffset_end() + charsz_fo;
if self.is_line_last(&linep) {
if self.find_line_lru_cache_enabled {
Expand Down Expand Up @@ -622,16 +622,15 @@ impl LineReader {
let bi_stop: BlockIndex = bptr_middle.len() as BlockIndex;
assert_ge!(bi_stop, charsz_bi, "bi_stop is less than charsz; not yet handled");

// XXX: multi-byte
//bi_beg = bi_stop - charsz_bi;
// XXX: only handle UTF-8/ASCII encoding
dpof!("({}) B1: scan middle block {} forwards, starting from blockindex {} (fileoffset {}) searching for newline B",
fileoffset,
bo_middle,
bi_at,
self.file_offset_at_block_offset_index(bo_middle, bi_at)
);
loop {
// XXX: single-byte encoding
// XXX: only handle UTF-8/ASCII encoding
if (*bptr_middle)[bi_at] == NLu8 {
found_nl_b = true;
fo_nl_b = self.file_offset_at_block_offset_index(bo_middle, bi_at);
Expand Down Expand Up @@ -867,7 +866,7 @@ impl LineReader {
BI_STOP,
);
loop {
// XXX: single-byte encoding
// XXX: Issue #16 only handles UTF-8/ASCII encoding
if (*bptr_middle)[bi_at] == NLu8 {
found_nl_a = true;
fo_nl_a = self.file_offset_at_block_offset_index(bo_middle, bi_at);
Expand All @@ -880,15 +879,15 @@ impl LineReader {
byte_to_char_noraw((*bptr_middle)[bi_at]),
);
// adjust offsets one forward
// XXX: single-byte encoding
// XXX: Issue #16 only handles UTF-8/ASCII encoding
fo_nl_a1 = fo_nl_a + charsz_fo;
bi_at += charsz_bi;
break;
}
if bi_at == 0 {
break;
}
// XXX: single-byte encoding
// XXX: Issue #16 only handles UTF-8/ASCII encoding
bi_at -= charsz_bi;
if bi_at < BI_STOP {
break;
Expand Down Expand Up @@ -982,8 +981,6 @@ impl LineReader {
/// fine_line(2) -> 2,2 "y"
/// ```
///
/// XXX: presumes a single-byte can represent a '\n'; i.e. does not handle UTF-16 or UTF-32 or other.
///
/// XXX: returning the "next fileoffset (along with `LineP`) is jenky. Just return the `LineP`.
/// and/or add `iter` capabilities to `Line` that will hide tracking the "next fileoffset".
///
Expand All @@ -992,7 +989,7 @@ impl LineReader {
/// Changes require extensive retesting.
/// You've been warned.
///
// TODO: [2021/08/30] handle different encodings
// XXX: Issue #16 only handles UTF-8/ASCII encoding
pub fn find_line(&mut self, fileoffset: FileOffset) -> ResultS4LineFind {
dpnf!("(LineReader@{:p}, {})", self, fileoffset);

Expand Down Expand Up @@ -1091,11 +1088,11 @@ impl LineReader {
let mut bi_at: BlockIndex = bi_middle;
let bi_stop: BlockIndex = bptr_middle.len() as BlockIndex;
assert_ge!(bi_stop, charsz_bi, "bi_stop is less than charsz; not yet handled");
// XXX: multi-byte
// XXX: Issue #16 only handles UTF-8/ASCII encoding
//bi_beg = bi_stop - charsz_bi;
dpof!("B1: scan middle block {} forwards (block len {}), starting from blockindex {} (fileoffset {}) searching for newline B", bo_middle, (*bptr_middle).len(), bi_at, self.file_offset_at_block_offset_index(bo_middle, bi_at));
loop {
// XXX: single-byte encoding
// XXX: Issue #16 only handles UTF-8/ASCII encoding
if (*bptr_middle)[bi_at] == NLu8 {
found_nl_b = true;
fo_nl_b = self.file_offset_at_block_offset_index(bo_middle, bi_at);
Expand Down Expand Up @@ -1175,7 +1172,7 @@ impl LineReader {
bi_end = (*bptr).len() as BlockIndex;
assert_ge!(bi_end, charsz_bi, "blockindex bi_end {} is less than charsz; not yet handled, file {:?}", bi_end, self.path());
assert_ne!(bi_end, 0, "blockindex bi_end is zero; Block at blockoffset {}, BlockP @0x{:p}, has len() zero", bof, bptr);
// XXX: multi-byte
// XXX: Issue #16 only handles UTF-8/ASCII encoding
//bi_beg = bi_end - charsz_bi;
dpof!(
"B2: scan block {} forwards, starting from blockindex {} (fileoffset {}) up to blockindex {} searching for newline B",
Expand All @@ -1185,7 +1182,7 @@ impl LineReader {
bi_end,
);
loop {
// XXX: single-byte encoding
// XXX: Issue #16 only handles UTF-8/ASCII encoding
if (*bptr)[bi_beg] == NLu8 {
found_nl_b = true;
fo_nl_b = self.file_offset_at_block_offset_index(bof, bi_beg);
Expand Down Expand Up @@ -1413,7 +1410,7 @@ impl LineReader {
bo_middle, bi_at, self.file_offset_at_block_offset_index(bo_middle, bi_at), BI_STOP,
);
loop {
// XXX: single-byte encoding
// XXX: Issue #16 only handles UTF-8/ASCII encoding
if (*bptr_middle)[bi_at] == NLu8 {
found_nl_a = true;
fo_nl_a = self.file_offset_at_block_offset_index(bo_middle, bi_at);
Expand All @@ -1425,15 +1422,15 @@ impl LineReader {
byte_to_char_noraw((*bptr_middle)[bi_at]),
);
// adjust offsets one forward
// XXX: single-byte encoding
// XXX: Issue #16 only handles UTF-8/ASCII encoding
fo_nl_a1 = fo_nl_a + charsz_fo;
bi_at += charsz_bi;
break;
}
if bi_at == 0 {
break;
}
// XXX: single-byte encoding
// XXX: Issue #16 only handles UTF-8/ASCII encoding
bi_at -= charsz_bi;
if bi_at < BI_STOP {
break;
Expand Down Expand Up @@ -1523,7 +1520,7 @@ impl LineReader {
bof, bi_at, self.file_offset_at_block_offset_index(bof, bi_at), BI_STOP,
);
loop {
// XXX: single-byte encoding
// XXX: Issue #16 only handles UTF-8/ASCII encoding
if (*bptr)[bi_at] == NLu8 {
found_nl_a = true;
fo_nl_a = self.file_offset_at_block_offset_index(bof, bi_at);
Expand All @@ -1535,7 +1532,7 @@ impl LineReader {
byte_to_char_noraw((*bptr)[bi_at]),
);
// adjust offsets one forward
// XXX: single-byte encoding
// XXX: Issue #16 only handles UTF-8/ASCII encoding
fo_nl_a1 = fo_nl_a + charsz_fo;
bi_at += charsz_bi;
let bof_a1 = self.block_offset_at_file_offset(fo_nl_a1);
Expand Down Expand Up @@ -1573,7 +1570,7 @@ impl LineReader {
if bi_at == 0 {
break;
}
// XXX: single-byte encoding
// XXX: Issue #16 only handles UTF-8/ASCII encoding
bi_at -= charsz_bi;
if bi_at < BI_STOP {
break;
Expand Down
12 changes: 6 additions & 6 deletions src/readers/syslinereader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -623,7 +623,7 @@ impl SyslineReader {
dpnf!("syslines.insert({}, Sysline @[{}, {}] datetime: {:?})", fo_beg, (*syslinep).fileoffset_begin(), (*syslinep).fileoffset_end(), (*syslinep).dt());
self.syslines.insert(fo_beg, syslinep.clone());
self.syslines_count += 1;
// XXX: multi-byte character
// XXX: Issue #16 only handles UTF-8/ASCII encoding
let fo_end1: FileOffset = fo_end + (self.charsz() as FileOffset);
dpxf!("syslines_by_range.insert(({}‥{}], {})", fo_beg, fo_end1, fo_beg);
self.syslines_by_range.insert(fo_beg..fo_end1, fo_beg);
Expand Down Expand Up @@ -734,7 +734,7 @@ impl SyslineReader {
dpof!("line too short {} for requested start {}; continue", line.len(), dtpd.range_regex.start);
continue;
}
// XXX: does not support multi-byte string; assumes single-byte
// XXX: Issue #16 only handles UTF-8/ASCII encoding
let slice_end: usize;
if line.len() > dtpd.range_regex.end {
slice_end = dtpd.range_regex.end;
Expand Down Expand Up @@ -1081,7 +1081,7 @@ impl SyslineReader {
self.syslines_by_range_hit += 1;
let fo: &FileOffset = range_fo.1;
let syslinep: SyslineP = self.syslines[fo].clone();
// XXX: multi-byte character encoding
// XXX: Issue #16 only handles UTF-8/ASCII encoding
let fo_next: FileOffset = (*syslinep).fileoffset_next() + (self.charsz() as FileOffset);
if self.is_sysline_last(&syslinep) {
dpxf!(
Expand Down Expand Up @@ -1123,7 +1123,7 @@ impl SyslineReader {
self.syslines_hit += 1;
dpof!("hit self.syslines for FileOffset {}", fileoffset);
let syslinep: SyslineP = self.syslines[&fileoffset].clone();
// XXX: multi-byte character encoding
// XXX: Issue #16 only handles UTF-8/ASCII encoding
let fo_next: FileOffset = (*syslinep).fileoffset_end() + (self.charsz() as FileOffset);
if self.is_sysline_last(&syslinep) {
dpof!(
Expand Down Expand Up @@ -1502,7 +1502,7 @@ impl SyslineReader {
debug_assert!(self.syslines_by_range.contains_key(&fo1), "self.syslines.contains_key({}) however, self.syslines_by_range.contains_key({}); syslines_by_range out of synch", fo1, fo1);
dpo!("find_sysline: hit self.syslines for FileOffset {}", fo1);
let syslinep = self.syslines[&fo1].clone();
// XXX: multi-byte character encoding
// XXX: Issue #16 only handles UTF-8/ASCII encoding
let fo_next = (*syslinep).fileoffset_end() + (self.charsz() as FileOffset);
// TODO: determine if `fileoffset` is the last sysline of the file
// should add a private helper function for this task `is_sysline_last(FileOffset)` ... something like that
Expand Down Expand Up @@ -1535,7 +1535,7 @@ impl SyslineReader {
self.syslines_by_range_hit += 1;
let fo = range_fo.1;
let syslinep = self.syslines[fo].clone();
// XXX: multi-byte character encoding
// XXX: Issue #16 only handles UTF-8/ASCII encoding
let fo_next = (*syslinep).fileoffset_next() + (self.charsz() as FileOffset);
if self.find_sysline_lru_cache_enabled {
self.find_sysline_lru_cache_put += 1;
Expand Down

0 comments on commit c35066c

Please sign in to comment.