openreplay/read-file.ts
2023-04-21 18:48:46 +02:00

173 lines
4.4 KiB
TypeScript

import MFileReader from './frontend/app/player/web/messages/MFileReader';
import {
MType,
} from './frontend/app/player/web/messages/raw.gen';
import fs from 'fs'
// silent logger
// const logger = {
// log(){},
// error(){},
// warn(){},
// group(){},
// }
/**
* For reading big files by chunks
*/
function readBytes(fd, sharedBuffer) {
return new Promise((resolve, reject) => {
fs.read(
fd,
sharedBuffer,
0,
sharedBuffer.length,
null,
(err) => {
if(err) { return reject(err); }
resolve(void)
}
)
})
}
async function* readByChunks(filePath, size) {
const sharedBuffer = Buffer.alloc(size);
const stats = fs.statSync(filePath); // file details
const fd = fs.openSync(filePath); // file descriptor
let bytesRead = 0; // how many bytes were read
let end = size;
for(let i = 0; i < Math.ceil(stats.size / size); i++) {
await readBytes(fd, sharedBuffer);
bytesRead = (i + 1) * size;
if(bytesRead > stats.size) {
// When we reach the end of file,
// we have to calculate how many bytes were actually read
end = size - (bytesRead - stats.size);
}
yield sharedBuffer.slice(0, end);
}
}
/* ==== end chunk-reader === */
/*== Message generators ==*/
async function* readBigFileMessages(file: string, chunkSize: number) {
let i = 0
const fileReader = new MFileReader(new Uint8Array(), 0)
for await(const chunk of readByChunks(file, chunkSize)) {
i++
fileReader.append(chunk)
let msg
while (msg = fileReader.readNext()) {
yield msg
}
console.log("Read chunk: ", i)
}
}
function* readMessagesTwoFiles(filename1: string, filename2: string) {
const file1 = fs.readFileSync(filename1)
const file2 = fs.readFileSync(filename2)
console.log("First file: ", file1.length, " bytes")
const fileReader = new MFileReader(file1, 0 )
fileReader.append(file2)
let msg
while (msg = fileReader.readNext()) {
yield [ msg, fileReader.getPosition() ]
}
}
/*== end message generators ==*/
function addToMap(map, key, n=1) {
map[key] = map[key] ? map[key] + n : n
}
function estimateSize(msg) {
return Object.values(msg).reduce((prevSum: number, val: any) => {
if (typeof val === "string") {
return prevSum + val.length + 1
}
if (typeof val ==="number") {
return prevSum + 2
}
return prevSum
}, 0)
}
const mapByTp = {}
const mapBySize = {}
const stringRepeatMapAttrs = {}
const stringRepeatMapAttrsNodes = {}
const stringRepeatMapOthers = {}
function updateStringsMap(map, msg){
Object.values(msg).forEach(val => {
if (typeof val === "string") {
addToMap(map, val)
}
})
}
const CHUNK_SIZE = 100000000; // 100MB
const FILE = "../decrypted0.mob"
let lastI = 0
let currentPageUrl = ""
async function main() {
for (const msg of readBigFileMessages(FILE, CHUNK_SIZE)) {
console.log(msg)
// const index = msg._index
// lastI = isNaN(index) ? lastI : index
// addToMap(mapByTp, msg.tp)
// addToMap(mapBySize, msg.tp, estimateSize(msg))
// if (msg.tp === 4) {
// currentPageUrl = msg.url
// }
// if (msg.tp === 12) {
// if (!stringRepeatMapAttrsNodes[msg.name]) {
// stringRepeatMapAttrsNodes[msg.name] = {}
// }
// if (!stringRepeatMapAttrsNodes[msg.value]) {
// stringRepeatMapAttrsNodes[msg.value] = {}
// }
// addToMap(stringRepeatMapAttrsNodes[msg.name], msg.id)
// addToMap(stringRepeatMapAttrsNodes[msg.value], msg.id)
// updateStringsMap(stringRepeatMapAttrs, msg)
// } else { updateStringsMap(stringRepeatMapOthers, msg)}
}
}
function calcStrMapStats(strMap){
const topStringMap = {}
const stringEntries = Object.entries(strMap)
stringEntries
.sort(([k1, v1], [k2, v2]) => v1*k1.length - v2*k2.length)
.slice(-10)
.forEach(([key, val]) => topStringMap[key]=val)
const keySize = Math.log10(stringEntries.length)
const strSum = stringEntries
.reduce((s, [k, v]) => s+k.length*v, 0)
const redSum = stringEntries
.reduce((s, [k, v]) => s+v*keySize, 0)
const dictSize = stringEntries
.reduce((s, [k, v]) => s+k.length +keySize, 0)
return [ strSum, redSum, dictSize, keySize, topStringMap ]
}
const statsAttr = calcStrMapStats(stringRepeatMapAttrs)
const statsOthers = calcStrMapStats(stringRepeatMapOthers)
// main()