From d39b3fbb1e482efef485e12c3366aba987b08c2e Mon Sep 17 00:00:00 2001 From: Kartik Gupta Date: Fri, 8 Sep 2023 23:05:03 +0530 Subject: [PATCH] tSNE vector compression optimised using wasm-bhtsne --- package-lock.json | 11 +++ package.json | 1 + src/components/VisualizeChart/index.jsx | 2 +- src/components/VisualizeChart/worker2.js | 109 +++++++++++++++++++++++ vite.config.js | 5 ++ 5 files changed, 127 insertions(+), 1 deletion(-) create mode 100644 src/components/VisualizeChart/worker2.js diff --git a/package-lock.json b/package-lock.json index 325bc2e6..694ca5dd 100644 --- a/package-lock.json +++ b/package-lock.json @@ -44,6 +44,7 @@ "react-router-dom": "^6.8.1", "vite": "^4.3.3", "vite-plugin-svgr": "^2.4.0", + "wasm-bhtsne": "^0.3.3", "web-vitals": "^2.1.4" }, "devDependencies": { @@ -6127,6 +6128,11 @@ "node": ">=14" } }, + "node_modules/wasm-bhtsne": { + "version": "0.3.3", + "resolved": "https://registry.npmjs.org/wasm-bhtsne/-/wasm-bhtsne-0.3.3.tgz", + "integrity": "sha512-lFWGVJgEFf7PwskPPLEVrltWL0UqX+kDFFNxOfXPH6WyCXowAqtXov9fKGKnpjCDCVx6BQuHasj5vqEj5WJocA==" + }, "node_modules/web-vitals": { "version": "2.1.4", "license": "Apache-2.0" @@ -10162,6 +10168,11 @@ "xml-name-validator": "^4.0.0" } }, + "wasm-bhtsne": { + "version": "0.3.3", + "resolved": "https://registry.npmjs.org/wasm-bhtsne/-/wasm-bhtsne-0.3.3.tgz", + "integrity": "sha512-lFWGVJgEFf7PwskPPLEVrltWL0UqX+kDFFNxOfXPH6WyCXowAqtXov9fKGKnpjCDCVx6BQuHasj5vqEj5WJocA==" + }, "web-vitals": { "version": "2.1.4" }, diff --git a/package.json b/package.json index 13a5f273..7d496bbb 100644 --- a/package.json +++ b/package.json @@ -39,6 +39,7 @@ "react-router-dom": "^6.8.1", "vite": "^4.3.3", "vite-plugin-svgr": "^2.4.0", + "wasm-bhtsne": "^0.3.3", "web-vitals": "^2.1.4" }, "scripts": { diff --git a/src/components/VisualizeChart/index.jsx b/src/components/VisualizeChart/index.jsx index 9bf14380..22a036ba 100644 --- a/src/components/VisualizeChart/index.jsx +++ b/src/components/VisualizeChart/index.jsx @@ -123,7 +123,7 @@ const VisualizeChart = ({ scrollResult }) => { ], }); - const worker = new Worker(new URL('./worker.js', import.meta.url), { + const worker = new Worker(new URL('./worker2.js', import.meta.url), { type: 'module', }); diff --git a/src/components/VisualizeChart/worker2.js b/src/components/VisualizeChart/worker2.js new file mode 100644 index 00000000..7fca98f8 --- /dev/null +++ b/src/components/VisualizeChart/worker2.js @@ -0,0 +1,109 @@ +/* eslint-disable no-restricted-globals */ + +import get from 'lodash/get'; +import init, { tSNE as Tsne } from 'wasm-bhtsne'; + +const MESSAGE_INTERVAL = 200; + +self.onmessage = async function (e) { + await init(); + + let now = new Date().getTime(); + const data1 = e.data; + const data = []; + + if (data1?.result?.points?.length === 0) { + self.postMessage({ + data: [], + error: 'No data found', + }); + return; + } else if (data1?.result?.points?.length === 1) { + self.postMessage({ + data: [], + error: 'cannot perform tsne on single point', + }); + return; + } else if (typeof data1?.result?.points[0].vector.length === 'number') { + data1?.result?.points?.forEach((point) => { + data.push(point.vector); + }); + } else if (typeof data1?.result?.points[0].vector === 'object') { + if (data1.vector_name === undefined) { + self.postMessage({ + data: [], + error: 'No vector name found, select a vaild vector_name', + }); + return; + } else if (data1?.result?.points[0].vector[data1?.vector_name] === undefined) { + self.postMessage({ + data: [], + error: 'No vector found with name ' + data1?.vector_name, + }); + return; + } else if (data1?.result?.points[0].vector[data1?.vector_name]) { + data1?.result?.points?.forEach((point) => { + data.push(point.vector[data1?.vector_name]); + }); + } else { + self.postMessage({ + data: [], + error: 'Unexpected Error Occured', + }); + return; + } + } else { + self.postMessage({ + data: [], + error: 'Unexpected Error Occured', + }); + return; + } + if (data.length) { + const tsneencoder = new Tsne(data); + let j = {}; + for (let i = 0; i < 500; i++) { + j = tsneencoder.barnes_hut(1); + if (Date.now() - now > MESSAGE_INTERVAL) { + now = Date.now(); + self.postMessage({ result: getDataset(data1, j), error: null }); + } + } + self.postMessage({ result: getDataset(data1, j), error: null }); + } +}; + +function getDataset(data, reducedPoint) { + const dataset = []; + const labelby = data.color_by; + if (labelby) { + data.labelByArrayUnique.forEach((label) => { + dataset.push({ + label: label, + data: [], + }); + }); + + data.result?.points?.forEach((point, index) => { + const label = get(point.payload, labelby); + dataset[data.labelByArrayUnique.indexOf(label)].data.push({ + x: reducedPoint[index][0], + y: reducedPoint[index][1], + point: point, + }); + }); + } else { + dataset.push({ + label: 'data', + data: [], + }); + data.result?.points?.forEach((point, index) => { + dataset[0].data.push({ + x: reducedPoint[index][0], + y: reducedPoint[index][1], + point: point, + }); + }); + } + return dataset; +} diff --git a/vite.config.js b/vite.config.js index f4bd3e74..1733fb74 100644 --- a/vite.config.js +++ b/vite.config.js @@ -11,6 +11,11 @@ export default defineConfig({ build: { outDir: 'dist', }, + optimizeDeps: { + exclude: [ + "wasm-bhtsne", + ], + }, plugins: [ reactRefresh(), svgrPlugin({