The viewers can automatically decode these encodings, so I request that StringTools' decoding be enhanced. (Instead of having to study redundant JScript code.)
Test script
function OnClick(clickData) {
var tab = clickData.func.sourcetab;
var fsu = DOpus.FSUtil();
var ST = DOpus.Create.StringTools();
for (var e = new Enumerator(tab.selected_files); !e.atEnd(); e.moveNext()) {
var file = e.item();
var filePath = file.realpath;
if (fsu.Exists(filePath))
{
var fileContent = "";
var openText = fsu.OpenFile(filePath, "r");
var text = openText.Read();
openText.Close;
var textDecode = ST.Decode(text, "auto"); //var textDecode = ST.Decode(text, "utf8");
var textContent = ST.Truncate(textDecode, 2048, 0);
DOpus.Output(textContent)
}
}
}
Result:
Format Decode: utf-8 Decode: auto
UTF-8 √ × (Throwing Error, ANSI too)
UTF-8 with BOM √ √
UTF-16 BE × √
UTF-16 LE × √
Try to recognize more code pages, but the more there are, the more errors will be caused.
I deleted some code pages, in addition to the above code pages, it also supports gb2312, big5, Shift-JIS (ST.Decode cannot recognize?), but it is enough for me.
function OnClick(clickData) {
var tab = clickData.func.sourcetab;
var fsu = DOpus.FSUtil();
var ST = DOpus.Create.StringTools();
// Loop through all selected files in the source tab
for (var e = new Enumerator(tab.selected_files); !e.atEnd(); e.moveNext()) {
var file = e.item();
var filePath = file.realpath;
// Check if the file exists
if (fsu.Exists(filePath)) {
var encoding = detectFileEncoding(filePath);
var sysEnc = getSystemANSIEncoding();
DOpus.Output('File Encoding: ' + encoding + '\nIs ANSI: ' + (encoding == sysEnc ? "Yes" : "No"));
// Read the file content
var openText = fsu.OpenFile(filePath, "r");
var bytes = openText.Read();
openText.Close();
var textDecode = ST.Decode(bytes, encoding);
var textContent = ST.Truncate(textDecode, 2048, 0);
DOpus.Output('Content:\n' + textContent);
}
}
}
// Detect the file encoding based on BOM and byte patterns
function detectFileEncoding(fp) {
try {
var stream = new ActiveXObject("ADODB.Stream");
stream.Type = 1;
stream.Open();
stream.LoadFromFile(fp);
try { var bytes = new VBArray(stream.Read(4096)).toArray(); }
catch (error) { return "UTF-8" }
stream.Close();
// ================== BOM Detection ==================
if (bytes[0] === 0xEF && bytes[1] === 0xBB && bytes[2] === 0xBF) return "UTF-8";
if (bytes[0] === 0xFF && bytes[1] === 0xFE) return "UTF-16 LE";
if (bytes[0] === 0xFE && bytes[1] === 0xFF) return "UTF-16 BE";
// ================== Encoding Feature Detection ==================
var scores = { utf8: 0, gbk: 0, big5: 0, euckr: 0, sjis: 0 };
var sysEnc = getSystemANSIEncoding();
for (var i = 0; i < bytes.length; i++) {
var b1 = bytes[i];
// ------------------ UTF-8 Strict Validation ------------------
if (b1 < 0x80) {
scores.utf8 += 0.1; // Lower ASCII weight
} else if ((b1 & 0xE0) === 0xC0) { // 2-byte sequence
if (i + 1 >= bytes.length) break;
var b2 = bytes[++i];
if ((b2 & 0xC0) === 0x80) {
if (b1 >= 0xC2 && b1 <= 0xF4) scores.utf8 += 1;
}
} else if ((b1 & 0xF0) === 0xE0) { // 3-byte sequence
if (i + 2 >= bytes.length) break;
var b2 = bytes[++i], b3 = bytes[++i];
if ((b2 & 0xC0) === 0x80 && (b3 & 0xC0) === 0x80) {
if (!(b1 === 0xED && b2 >= 0xA0)) scores.utf8 += 2;
}
}
// ------------------ Double-byte Encoding Detection ------------------
if (++i >= bytes.length) break;
var b2 = bytes[i];
// GBK precise detection (only GB2312 core area)
if (
(b1 >= 0xB0 && b1 <= 0xF7 && b2 >= 0xA1 && b2 <= 0xFE) ||
(b1 >= 0xA1 && b1 <= 0xA9 && b2 >= 0xA1 && b2 <= 0xA3)
) {
scores.gbk += 2;
}
// Big5 precise detection (high-frequency character area)
else if (
(b1 >= 0xA4 && b1 <= 0xC6 && b2 >= 0x40 && b2 <= 0x7E) ||
(b1 >= 0xC9 && b1 <= 0xF9 && b2 >= 0x40 && b2 <= 0x6B)
) {
scores.big5 += 2;
}
// Shift-JIS precise detection (including common Japanese characters)
else if (
(b1 >= 0x81 && b1 <= 0x9F && b2 >= 0x40 && b2 <= 0x7E) || // Japanese characters
(b1 >= 0xE0 && b1 <= 0xFC && b2 >= 0x40 && b2 <= 0x7E)
) {
scores.sjis += 3; // Shift-JIS weight
}
}
// ================== Dynamic Judgment Logic ==================
var maxScore = Math.max(scores.utf8, scores.gbk, scores.big5, scores.euckr, scores.sjis);
// Judgment rules (priority: system encoding > Shift-JIS > Big5 > UTF-8 > GBK)
if (maxScore < 2) return sysEnc; // No significant features
if (scores.sjis >= 3 && scores.sjis >= maxScore * 0.8) return "Shift-JIS";
if (scores.big5 >= 3 && scores.big5 >= maxScore * 0.8) return "big5";
if (scores.utf8 >= 3 && scores.utf8 >= maxScore * 0.9) return "UTF-8";
if (scores.gbk >= 2 && sysEnc === "gb2312") return "gb2312";
return sysEnc; // Default fallback
} catch (e) {
return "UTF-8";
}
}
// Get the system ANSI encoding (enhanced version)
function getSystemANSIEncoding() {
try {
var acp = new ActiveXObject("WScript.Shell")
.RegRead("HKLM\\SYSTEM\\CurrentControlSet\\Control\\Nls\\CodePage\\ACP");
// Extended encoding mapping
var encMap = {
"936": "gb2312", // Simplified Chinese
"950": "big5", // Traditional Chinese
"932": "Shift_JIS" // Japanese
};
return encMap[acp] || "UTF-8";
} catch(e) {
return "UTF-8";
}
}