Skip to content

Commit

Permalink
Move all test functions into tests module
Browse files Browse the repository at this point in the history
  • Loading branch information
yestyle committed Mar 4, 2023
1 parent 58560da commit 3ed2015
Showing 1 changed file with 83 additions and 85 deletions.
168 changes: 83 additions & 85 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,6 @@ use argh::FromArgs;
use byteorder::{BigEndian, ReadBytesExt};
use bzip2::bufread::BzDecoder;
use flate2::bufread::GzDecoder;
#[cfg(test)]
use grep_matcher::Matcher;
#[cfg(test)]
use grep_regex::RegexMatcherBuilder;
#[cfg(test)]
use grep_searcher::{Searcher, Sink, SinkMatch};
use lz4_flex::frame::FrameDecoder as Lz4Decoder;
use lzma::LzmaReader;
use regex::bytes::RegexBuilder;
Expand All @@ -32,85 +26,6 @@ const MAGIC_NUMBER_LZO: &str = r"\x89\x4c\x5a";
const MAGIC_NUMBER_LZ4: &str = r"\x02\x21\x4c\x18";
const MAGIC_NUMBER_ZSTD: &str = r"\x28\xb5\x2f\xfd";

#[cfg(test)]
fn search_bytes(file: &mut File, pattern: &[u8]) -> Result<u64, io::Error> {
let filelen = file.metadata()?.len();
let mut start = 0;
let mut offset: u64 = 0;

file.seek(SeekFrom::Start(0))?;
for b in file.bytes() {
if let Ok(b) = b {
if b == pattern[start] {
start += 1;
if start == pattern.len() {
// let offset point to the start of the pattern
offset -= start as u64 - 1;
break;
}
} else {
start = 0;
}
}
offset += 1;
}

if offset < filelen - pattern.len() as u64 {
Ok(offset)
} else {
Err(io::Error::from(ErrorKind::NotFound))
}
}

#[cfg(test)]
struct Offset<F>(F)
where
F: FnMut(u64, &[u8]) -> Result<bool, io::Error>;

#[cfg(test)]
impl<F> Sink for Offset<F>
where
F: FnMut(u64, &[u8]) -> Result<bool, io::Error>,
{
type Error = io::Error;

fn matched(&mut self, _searcher: &Searcher, mat: &SinkMatch<'_>) -> Result<bool, io::Error> {
// mat.absolute_bytes_offset() is the offset of the matched line
// mat.bytes() is the bytes of the matched line
(self.0)(mat.absolute_byte_offset(), mat.bytes())
}
}

#[cfg(test)]
fn search_ripgrep(file: &mut File, pattern: &str) -> Result<u64, io::Error> {
// Disable Unicode (\u flag) to search arbitrary (non-UTF-8) bytes
let matcher = if let Ok(matcher) = RegexMatcherBuilder::new().unicode(false).build(pattern) {
matcher
} else {
return Err(io::Error::from(ErrorKind::InvalidInput));
};

let mut offset = 0;
file.seek(SeekFrom::Start(0))?;
Searcher::new().search_file(
&matcher,
file,
Offset(|line_offset, bytes| {
// find pattern within the line and add onto line offset
// We are guaranteed to find a match, so the unwrap is OK.
let mymatch = matcher.find(bytes).unwrap().unwrap();
offset = line_offset + mymatch.start() as u64;
Ok(true)
}),
)?;

if offset != 0 {
Ok(offset)
} else {
Err(io::Error::from(ErrorKind::NotFound))
}
}

fn search_regex(file: &File, pattern: &str) -> Result<u64, io::Error> {
let mut buff = BufReader::new(file);
let mut bytes = vec![0; 1024];
Expand Down Expand Up @@ -416,6 +331,9 @@ fn main() {
#[cfg(test)]
mod tests {
use super::*;
use grep_matcher::Matcher;
use grep_regex::RegexMatcherBuilder;
use grep_searcher::{Searcher, Sink, SinkMatch};
use std::fs::File;
use time::Instant;

Expand Down Expand Up @@ -451,6 +369,86 @@ mod tests {
const MAGIC_NUMBER_ZSTD: &[u8] = b"\x28\xb5\x2f\xfd";
const PATTERN_OFFSET_VMLINUX_ZSTD: u64 = 16063;

fn search_bytes(file: &mut File, pattern: &[u8]) -> Result<u64, io::Error> {
let filelen = file.metadata()?.len();
let mut start = 0;
let mut offset: u64 = 0;

file.seek(SeekFrom::Start(0))?;
for b in file.bytes() {
if let Ok(b) = b {
if b == pattern[start] {
start += 1;
if start == pattern.len() {
// let offset point to the start of the pattern
offset -= start as u64 - 1;
break;
}
} else {
start = 0;
}
}
offset += 1;
}

if offset < filelen - pattern.len() as u64 {
Ok(offset)
} else {
Err(io::Error::from(ErrorKind::NotFound))
}
}

struct Offset<F>(F)
where
F: FnMut(u64, &[u8]) -> Result<bool, io::Error>;

impl<F> Sink for Offset<F>
where
F: FnMut(u64, &[u8]) -> Result<bool, io::Error>,
{
type Error = io::Error;

fn matched(
&mut self,
_searcher: &Searcher,
mat: &SinkMatch<'_>,
) -> Result<bool, io::Error> {
// mat.absolute_bytes_offset() is the offset of the matched line
// mat.bytes() is the bytes of the matched line
(self.0)(mat.absolute_byte_offset(), mat.bytes())
}
}

fn search_ripgrep(file: &mut File, pattern: &str) -> Result<u64, io::Error> {
// Disable Unicode (\u flag) to search arbitrary (non-UTF-8) bytes
let matcher = if let Ok(matcher) = RegexMatcherBuilder::new().unicode(false).build(pattern)
{
matcher
} else {
return Err(io::Error::from(ErrorKind::InvalidInput));
};

let mut offset = 0;
file.seek(SeekFrom::Start(0))?;
Searcher::new().search_file(
&matcher,
file,
Offset(|line_offset, bytes| {
// find pattern within the line and add onto line offset
// We are guaranteed to find a match, so the unwrap is OK.
let mymatch = matcher.find(bytes).unwrap().unwrap();
offset = line_offset + mymatch.start() as u64;
Ok(true)
}),
)?;

if offset != 0 {
Ok(offset)
} else {
Err(io::Error::from(ErrorKind::NotFound))
}
}

#[test]
fn test_search_bytes() {
let mut file = File::open(PATH_VMLINUX_RAW).unwrap();
Expand Down

0 comments on commit 3ed2015

Please sign in to comment.