Skip to content

Commit

Permalink
wip - v2a
Browse files Browse the repository at this point in the history
  • Loading branch information
zbjornson committed Jan 31, 2025
1 parent 028d7e6 commit 596ebaa
Show file tree
Hide file tree
Showing 5 changed files with 374 additions and 5 deletions.
14 changes: 14 additions & 0 deletions index.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,18 @@ export class Transcoder {
* @param p Instance of PopulateInfo if populating paths.
*/
constructor(p?: PopulateInfo);

/**
* Transcodes the BSON buffer `b` into a JSON string stored in a Buffer.
* @param b BSON buffer
*/
transcode(b: Uint8Array): Buffer;

/**
* Finds all ObjectIds in `b` that don't have a corresponding object in `p`.
* @param b BSON buffer.
*/
getMissingIds(b: Uint8Array); void;
}

export class PopulateInfo {
Expand All @@ -17,10 +24,17 @@ export class PopulateInfo {
* @param items BSON buffers to populate the path with.
*/
addItems(path: string, items: Buffer[]): void;

/**
* Reuses objects for one path for another path.
* @param path1 The path that's already had `addItems()` called for it.
* @param path2 The path to reuse that path's items on.
*/
repeatPath(path1: string, path2: string): void;

/**
* Returns an array of unique IDs that are missing for a path.
* @param path The path to get missing IDs for.
*/
getMissingIdsForPath(path: string): Buffer[];
}
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
"@types/node": "^22.10.7",
"beautify-benchmark": "^0.2.4",
"benchmark": "^2.1.4",
"bson": "^4.0.2",
"bson": "^6.10.2",
"mocha": "^11.0.1",
"mongodb": "^3.5.6"
},
Expand Down
189 changes: 187 additions & 2 deletions src/bson-to-json.cc
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#include <ctime> // gmtime
#include <cmath> // isfinite
#include <unordered_map>
#include <unordered_set>
#include <string>
#include <array>
#include "napi.h"
Expand Down Expand Up @@ -176,14 +177,16 @@ struct SizedBuffer {
};

using ObjectIdMap = std::unordered_map<ObjectId, SizedBuffer, ObjectIdHasher, ObjectIdEquals>;
using ObjectIdSet = std::unordered_set<ObjectId, ObjectIdHasher, ObjectIdEquals>;

template <ISA isa>
class PopulateInfo : public Napi::ObjectWrap<PopulateInfo<isa> > {
public:
static Napi::Object Init(Napi::Env env, Napi::Object exports) {
Napi::Function func = Napi::ObjectWrap<PopulateInfo<isa>>::DefineClass(env, "PopulateInfo", {
Napi::ObjectWrap<PopulateInfo<isa>>::template InstanceMethod<&PopulateInfo<isa>::AddItems>("addItems"),
Napi::ObjectWrap<PopulateInfo<isa>>::template InstanceMethod<&PopulateInfo<isa>::RepeatPath>("repeatPath")
Napi::ObjectWrap<PopulateInfo<isa>>::template InstanceMethod<&PopulateInfo<isa>::RepeatPath>("repeatPath"),
Napi::ObjectWrap<PopulateInfo<isa>>::template InstanceMethod<&PopulateInfo<isa>::GetMissingIdsForPath>("getMissingIdsForPath")
});

Napi::FunctionReference* constructor = new Napi::FunctionReference();
Expand Down Expand Up @@ -252,8 +255,27 @@ class PopulateInfo : public Napi::ObjectWrap<PopulateInfo<isa> > {
paths[path2] = it->second;
}

Napi::Value GetMissingIdsForPath(const Napi::CallbackInfo& info) {
Napi::Env env = info.Env();

std::string path = info[0].As<Napi::String>().Utf8Value();
auto it = missingIds.find(path);
if (it == missingIds.end()) {
return Napi::Array::New(env, 0);
}

Napi::Array arr = Napi::Array::New(env, it->second.size());
size_t i = 0;
for (auto const& id : it->second) {
Napi::Buffer<uint8_t> buf = Napi::Buffer<uint8_t>::Copy(env, id.data(), id.size());
arr.Set(i++, buf);
}
return arr;
}

// TODO(perf) can this use string_view?
std::unordered_map<std::string, ObjectIdMap> paths;
std::unordered_map<std::string, ObjectIdSet> missingIds;
};

template<ISA isa>
Expand All @@ -271,7 +293,8 @@ class Transcoder : public Napi::ObjectWrap<Transcoder<isa> > {

static Napi::Object Init(Napi::Env env, Napi::Object exports) {
Napi::Function func = Napi::ObjectWrap<Transcoder<isa> >::DefineClass(env, "Transcoder", {
Napi::ObjectWrap<Transcoder<isa> >::template InstanceMethod<&Transcoder<isa>::transcodeNodeFn>("transcode")
Napi::ObjectWrap<Transcoder<isa> >::template InstanceMethod<&Transcoder<isa>::transcodeNodeFn>("transcode"),
Napi::ObjectWrap<Transcoder<isa> >::template InstanceMethod<&Transcoder<isa>::getMissingIdsNodeFn>("getMissingIds")
});

ctor = new Napi::FunctionReference();
Expand All @@ -291,6 +314,40 @@ class Transcoder : public Napi::ObjectWrap<Transcoder<isa> > {
}
}

/**
* Finds missing IDs for paths in the populateInfo object.
* @param in_ BSON document.
*/
void getMissingIdsNodeFn(const Napi::CallbackInfo& info) {
Napi::Env env = info.Env();

if (!info[0].IsTypedArray()) {
Napi::Error::New(env, "Input must be a buffer").ThrowAsJavaScriptException();
return;
}

if (info[0].As<Napi::TypedArray>().TypedArrayType() != napi_uint8_array) {
Napi::Error::New(env, "Input must be a buffer").ThrowAsJavaScriptException();
return;
}

Napi::Uint8Array arr = info[0].As<Napi::Uint8Array>();

in = arr.Data();
inLen = arr.ByteLength();
inIdx = 0;

if (UNLIKELY(inLen < 5)) {
Napi::Error::New(env, "Input buffer must have length >= 5").ThrowAsJavaScriptException();
return;
}

bool status = getMissingIds(false);
if (status) {
Napi::Error::New(env, err).ThrowAsJavaScriptException();
}
}

/**
* Transcodes the BSON document to JSON.
* @param in_ BSON document.
Expand Down Expand Up @@ -967,6 +1024,132 @@ class Transcoder : public Napi::ObjectWrap<Transcoder<isa> > {
out[outIdx++] = '"';
}

bool getMissingIds(
bool isArray,
std::string baseKey = ""
) {
const int32_t size = readLE<int32_t>();
if (UNLIKELY(size < 5))
RETURN_ERR("BSON size must be >= 5");

if (UNLIKELY(size + inIdx - 4 > inLen))
RETURN_ERR("BSON size exceeds input length");

int32_t arrIdx = 0;

while (true) {
const uint8_t elementType = in[inIdx++];
if (UNLIKELY(elementType == 0))
break;

if (isArray) {
inIdx += nDigits(arrIdx);
} else {
size_t keyStart = inIdx;
size_t keyEnd = inIdx;
while (in[keyEnd] != 0 && keyEnd < inLen)
keyEnd++;

if (keyEnd >= inLen)
RETURN_ERR("Truncated BSON (in key)");

inIdx = keyEnd;
currentPath = baseKey.empty() ?
std::string(in + keyStart, in + inIdx) :
baseKey + "." + std::string(in + keyStart, in + inIdx);
inIdx++; // skip null terminator
}

switch (elementType) {
case BSON_DATA_STRING: {
const int32_t size = readLE<int32_t>();
if (UNLIKELY(size <= 0 || static_cast<size_t>(size) > inLen - inIdx))
RETURN_ERR("Bad string length");
inIdx += size;
break;
}
case BSON_DATA_OID: {
if (LIKELY(inIdx + 12 <= inLen)) {
if (populateInfo) {
auto idMapForPath = populateInfo->paths.find(currentPath);
if (idMapForPath != populateInfo->paths.end()) {
ObjectId id;
memcpy(id.data(), in + inIdx, 12);
auto doc = idMapForPath->second.find(id);
if (doc == idMapForPath->second.end()) {
populateInfo->missingIds.try_emplace(currentPath, ObjectIdSet())
.first->second.insert(id);
}
}
}

inIdx += 12;
break;
} else
RETURN_ERR("Truncated BSON (in ObjectId)");
break;
}
case BSON_DATA_INT: {
inIdx += 4;
if (UNLIKELY(inIdx > inLen))
RETURN_ERR("Truncated BSON (in Int)");
break;
}
case BSON_DATA_NUMBER:
case BSON_DATA_DATE:
case BSON_DATA_LONG: {
inIdx += 8;
if (UNLIKELY(inIdx > inLen))
RETURN_ERR("Truncated BSON");
break;
}
case BSON_DATA_BOOLEAN: {
inIdx++;
if (UNLIKELY(inIdx > inLen))
RETURN_ERR("Truncated BSON (in Boolean)");
break;
}
case BSON_DATA_OBJECT: {
// Bounds check in head of this function.
if (UNLIKELY((getUnknownIds(false, currentPath))))
return true;
break;
}
case BSON_DATA_ARRAY: {
// Bounds check in head of this function.
if (UNLIKELY((getUnknownIds(true, currentPath))))
return true;
if (UNLIKELY(in[inIdx - 1] != 0)) {
err = "Invalid array terminator byte";
return true;
}
break;
}
case BSON_DATA_NULL:
case BSON_DATA_UNDEFINED: {
break;
}
case BSON_DATA_DECIMAL128:
case BSON_DATA_BINARY:
case BSON_DATA_REGEXP:
case BSON_DATA_SYMBOL:
case BSON_DATA_TIMESTAMP:
case BSON_DATA_MIN_KEY:
case BSON_DATA_MAX_KEY:
case BSON_DATA_CODE:
case BSON_DATA_CODE_W_SCOPE:
case BSON_DATA_DBPOINTER:
RETURN_ERR("BSON type incompatible with JSON");
default:
RETURN_ERR("Unknown BSON type");
}

arrIdx++;
}

return false;
}

bool transcodeObject(
bool isArray,
std::string baseKey = ""
Expand Down Expand Up @@ -1227,6 +1410,7 @@ void PopulateInfo<isa>::AddItems(const Napi::CallbackInfo& info) {
uint32_t nBuffers = buffers.Length();
paths.try_emplace(path.Utf8Value(), ObjectIdMap());
ObjectIdMap& map = paths[path.Utf8Value()];
ObjectIdSet& set = missingIds[path.Utf8Value()];

Napi::Object wrapedTranscoder = Transcoder<isa>::ctor->New({});
Transcoder<isa>* trans = Transcoder<isa>::Unwrap(wrapedTranscoder);
Expand All @@ -1250,6 +1434,7 @@ void PopulateInfo<isa>::AddItems(const Napi::CallbackInfo& info) {
}
std::memcpy(sb.data, trans->out, sb.size);
map[trans->docId] = sb;
set.erase(trans->docId);
}
}

Expand Down
Loading

0 comments on commit 596ebaa

Please sign in to comment.