diff --git a/frontend/app/player/web/messages/MFileReader.ts b/frontend/app/player/web/messages/MFileReader.ts index d1b131595..5f519f276 100644 --- a/frontend/app/player/web/messages/MFileReader.ts +++ b/frontend/app/player/web/messages/MFileReader.ts @@ -16,6 +16,10 @@ export default class MFileReader extends RawMessageReader { super(data) } + getPosition() { + return this.p + } + private needSkipMessage(): boolean { if (this.p === 0) return false for (let i = 7; i >= 0; i--) { diff --git a/read-file.ts b/read-file.ts new file mode 100644 index 000000000..1621777f8 --- /dev/null +++ b/read-file.ts @@ -0,0 +1,173 @@ +import MFileReader from './frontend/app/player/web/messages/MFileReader'; +import { + MType, +} from './frontend/app/player/web/messages/raw.gen'; + +import fs from 'fs' + + +// silent logger +// const logger = { +// log(){}, +// error(){}, +// warn(){}, +// group(){}, +// } + +/** + * For reading big files by chunks +*/ +function readBytes(fd, sharedBuffer) { + return new Promise((resolve, reject) => { + fs.read( + fd, + sharedBuffer, + 0, + sharedBuffer.length, + null, + (err) => { + if(err) { return reject(err); } + resolve(void) + } + ) + }) +} +async function* readByChunks(filePath, size) { + const sharedBuffer = Buffer.alloc(size); + const stats = fs.statSync(filePath); // file details + const fd = fs.openSync(filePath); // file descriptor + let bytesRead = 0; // how many bytes were read + let end = size; + + for(let i = 0; i < Math.ceil(stats.size / size); i++) { + await readBytes(fd, sharedBuffer); + bytesRead = (i + 1) * size; + if(bytesRead > stats.size) { + // When we reach the end of file, + // we have to calculate how many bytes were actually read + end = size - (bytesRead - stats.size); + } + yield sharedBuffer.slice(0, end); + } +} +/* ==== end chunk-reader === */ + +/*== Message generators ==*/ +async function* readBigFileMessages(file: string, chunkSize: number) { + let i = 0 + const fileReader = new MFileReader(new Uint8Array(), 0) + for await(const chunk of readByChunks(file, chunkSize)) { + i++ + fileReader.append(chunk) + let msg + while (msg = fileReader.readNext()) { + yield msg + } + console.log("Read chunk: ", i) + } +} +function* readMessagesTwoFiles(filename1: string, filename2: string) { + const file1 = fs.readFileSync(filename1) + const file2 = fs.readFileSync(filename2) + console.log("First file: ", file1.length, " bytes") + const fileReader = new MFileReader(file1, 0 ) + fileReader.append(file2) + let msg + while (msg = fileReader.readNext()) { + yield [ msg, fileReader.getPosition() ] + } +} +/*== end message generators ==*/ + + + +function addToMap(map, key, n=1) { + map[key] = map[key] ? map[key] + n : n +} + +function estimateSize(msg) { + return Object.values(msg).reduce((prevSum: number, val: any) => { + if (typeof val === "string") { + return prevSum + val.length + 1 + } + if (typeof val ==="number") { + return prevSum + 2 + } + return prevSum + }, 0) +} + +const mapByTp = {} +const mapBySize = {} +const stringRepeatMapAttrs = {} +const stringRepeatMapAttrsNodes = {} +const stringRepeatMapOthers = {} +function updateStringsMap(map, msg){ + Object.values(msg).forEach(val => { + if (typeof val === "string") { + addToMap(map, val) + } + }) +} + + +const CHUNK_SIZE = 100000000; // 100MB +const FILE = "../decrypted0.mob" + +let lastI = 0 +let currentPageUrl = "" +async function main() { + for (const msg of readBigFileMessages(FILE, CHUNK_SIZE)) { + console.log(msg) + + // const index = msg._index + // lastI = isNaN(index) ? lastI : index + + // addToMap(mapByTp, msg.tp) + // addToMap(mapBySize, msg.tp, estimateSize(msg)) + + // if (msg.tp === 4) { + // currentPageUrl = msg.url + // } + + // if (msg.tp === 12) { + + // if (!stringRepeatMapAttrsNodes[msg.name]) { + // stringRepeatMapAttrsNodes[msg.name] = {} + // } + // if (!stringRepeatMapAttrsNodes[msg.value]) { + // stringRepeatMapAttrsNodes[msg.value] = {} + // } + + // addToMap(stringRepeatMapAttrsNodes[msg.name], msg.id) + // addToMap(stringRepeatMapAttrsNodes[msg.value], msg.id) + + // updateStringsMap(stringRepeatMapAttrs, msg) + // } else { updateStringsMap(stringRepeatMapOthers, msg)} + + } +} +function calcStrMapStats(strMap){ + const topStringMap = {} + const stringEntries = Object.entries(strMap) + stringEntries + .sort(([k1, v1], [k2, v2]) => v1*k1.length - v2*k2.length) + .slice(-10) + .forEach(([key, val]) => topStringMap[key]=val) + + const keySize = Math.log10(stringEntries.length) + + const strSum = stringEntries + .reduce((s, [k, v]) => s+k.length*v, 0) + const redSum = stringEntries + .reduce((s, [k, v]) => s+v*keySize, 0) + const dictSize = stringEntries + .reduce((s, [k, v]) => s+k.length +keySize, 0) + return [ strSum, redSum, dictSize, keySize, topStringMap ] +} +const statsAttr = calcStrMapStats(stringRepeatMapAttrs) +const statsOthers = calcStrMapStats(stringRepeatMapOthers) + + +// main() +