option explicit
' Text Encode
' (c) 2016 qiuqi
' This is a script for Directory Opus.
' See http://www.gpsoft.com.au/DScripts/redirect.asp?page=scripts for development information.
'
'
'
'Here's the file "Hello" in various encodings:
'48 65 6C 6C 6F
'This is the traditional ANSI encoding.
'48 00 65 00 6C 00 6C 00 6F 00
'This is the Unicode (little-endian) encoding with no BOM.
'FF FE 48 00 65 00 6C 00 6C 00 6F 00
'This is the Unicode (little-endian) encoding with BOM. The BOM (FF FE) serves two purposes: First, it tags the file as a Unicode document, and second, the order in which the two bytes appear indicate that the file is little-endian.
'00 48 00 65 00 6C 00 6C 00 6F
'This is the Unicode (big-endian) encoding with no BOM. Notepad does not support this encoding.
'FE FF 00 48 00 65 00 6C 00 6C 00 6F
'This is the Unicode (big-endian) encoding with BOM. Notice that this BOM is in the opposite order from the little-endian BOM.
'EF BB BF 48 65 6C 6C 6F
'This is UTF-8 encoding. The first three bytes are the UTF-8 encoding of the BOM.
'2B 2F 76 38 2D 48 65 6C 6C 6F
'This is UTF-7 encoding. The first five bytes are the UTF-7 encoding of the BOM. Notepad doesn't support this encoding.
'No BOM can only guess.
'
'
' Called by Directory Opus to initialize the script
Function OnInit(initData)
initData.name = "Text Encode"
initData.version = "1.0"
initData.copyright = "(c) 2016 qiuqi"
if DOpus.version.AtLeast("12.0.8") then
initData.url = "https://resource.dopus.com/viewforum.php?f=35"
End If
initData.desc =""
initData.default_enable = true
initData.min_version = "12.0"
initData.config.FileExtension = "*.asp;*.aspx;*.asax;*.ascx;*.ashx;*.bat;*.cmd;*.c;*.h;*.cs;*.cpp;*.hpp;*.cc;*.c++;" & _
"*.css;*.ini;*.inf;*.pas;*.dproj;*.bdsproj;*.dpr;*.dpk;*.dfm;*.fmx;*.nfm;*.xfm;*.lfm;*.e;*.groovy;" & _
"*.html;*.htm;*.shtml;*.hta;*.jsl;*.java;*.jav;*.jsp;*.js;*.jse;*.json;*.pl;*.pm;" & _
"*.plex;*.php;*.php4;*.phtml;*.ps1;*.py;*.pyw;*.rb;*.rbx;*.erb;*.resx;*.sql;*.tcl;*.txt;" & _
"*.vbs;*.frm;*.vb;*.bas;*.xml;*.dtd;*.xhtml;*.xsl;*.xslt;*.wpl;*.xsd;*.xs"
Dim col
Set col = initData.AddColumn
col.name = "GetTextEncode"
col.method ="OnGetTextEncode"
col.label = "Encode"
col.justify = "left"
col.autogroup = False
'col.nosort = true
End Function
' Implement the GetTextEncode column
Function OnGetTextEncode(scriptColData)
Dim ExtStr, ExtSet
ExtSet = LCase(Script.config.FileExtension)
ExtStr = "*" & LCase(scriptColData.item.ext)
if Not scriptColData.item.is_dir Then
if (InStr(1, ExtSet, ExtStr) <> 0) and (Len(ExtStr) <> 0) Then
if (scriptColData.item.size = 0) Then
scriptColData.value = "File Is Empty"
ElseIf (scriptColData.item.size < 4) Then
scriptColData.value = "Unknow"
Else
scriptColData.value = GetEncoding(scriptColData.item)
End if
End if
End if
End Function
Function GetEncoding(FileName)
Dim Files, FileSize, Data, Encoding, i, OutStr
Set Files = DOpus.FSUtil.OpenFile(FileName)
Set Encoding = DOpus.Create.Blob(0,0,0,0)
If Files.Error = 0 Then
Files.Seek 0, "b"
Data = Files.Read(Encoding, 4)
Files.Close
End If
if Data < 4 then
GetEncoding = "Unknow"
exit Function
end if
' for i = 0 to Encoding.Size - 1
' OutStr = OutStr & " " & Hex(Encoding(i))
' Next
' DOpus.Output FileName & ": " & Trim(OutStr)
If (Encoding(0) = &HEF And Encoding(1) = &HBB And Encoding(2) = &HBF And Not Encoding(3) = &H00) Then
GetEncoding = "UTF-8 BOM"
ElseIf (Not Encoding(0) = &H00 And Encoding(1) = &H00 And Not Encoding(2) = &H00 And Encoding(3) = &H00) Then
GetEncoding = "UTF16LE"
ElseIf (Encoding(0) = &H00 And Not Encoding(1) = &H00 And Encoding(2) = &H00 And Not Encoding(3) = &H00) Then
GetEncoding = "UTF16BE"
ElseIf (Encoding(0) = &HFF And Encoding(1) = &HFE And Not Encoding(2) = &H00 And Encoding(3) = &H00) Then
GetEncoding = "UTF16LE BOM"
ElseIf (Encoding(0) = &HFE And Encoding(1) = &HFF And Encoding(2) = &H00 And Not Encoding(3) = &H00) Then
GetEncoding = "UTF16BE BOM"
ElseIf (Encoding(0) = &HFF And Encoding(1) = &HFE And Encoding(2) = &H00 And Encoding(3) = &H00) Then
GetEncoding = "UTF32BE BOM"
ElseIf (Encoding(0) = &H00 And Encoding(1) = &H00 And Encoding(2) = &HFE And Encoding(3) = &HFF) Then
GetEncoding = "UTF32LE BOM"
Else
GetEncoding = "ANSI"
End If
End Function
Thanks for posting this! o) It inspired me to add an "Encoding" column to an existing column set of mine (FileInfo).
I converted parts of your code to JScript and used your core logic to detect the file encoding. Very nice! o)
You are credited in the thread and code of course! o)
Smart-ass notes to your snippet:
- col.label = "Encode" -> wouldn't "Encoding" be better here (also applies to the scripts internal name)?
- Files.Seek 0, "b" -> not necessary?
- initData.url -> this is meant to be set to "Column text file encoding if I'm not wrong
Not sure if returning "ANSI" is correct, I chose to keep it at "?" if there is no match, since you can't be sure if it's ASCII or ANSI with codepaging.
Not an expert on file encoding though, took me some years to finally understand some of the basics. o)
cya,
tbone
[quote="tbone"]Thanks for posting this! o) It inspired me to add an "Encoding" column to an existing column set of mine (FileInfo).
I converted parts of your code to JScript and used your core logic to detect the file encoding. Very nice! o)
You are credited in the thread and code of course! o)
Smart-ass notes to your snippet:
- col.label = "Encode" -> wouldn't "Encoding" be better here (also applies to the scripts internal name)?
- Files.Seek 0, "b" -> not necessary?
- initData.url -> this is meant to be set to "Column text file encoding if I'm not wrong
Not sure if returning "ANSI" is correct, I chose to keep it at "?" if there is no match, since you can't be sure if it's ASCII or ANSI with codepaging.
Not an expert on file encoding though, took me some years to finally understand some of the basics. o)
cya,
tbone[/quote]
Hair is convenient for everyone, you are inspired me very happy.
You say "Hair"? Huhm.. o)
translation error