Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
63 changes: 63 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,7 @@ The following switches have different behavior in this version of `sqlcmd` compa
- To provide the value of the host name in the server certificate when using strict encryption, pass the host name with `-F`. Example: `-Ns -F myhost.domain.com`
- More information about client/server encryption negotiation can be found at <https://docs.microsoft.com/openspecs/windows_protocols/ms-tds/60f56408-0188-4cd5-8b90-25c6f2423868>
- `-u` The generated Unicode output file will have the UTF16 Little-Endian Byte-order mark (BOM) written to it.
- `-f` Specifies the code page for input and output files. See [Code Page Support](#code-page-support) below for details and examples.
- Some behaviors that were kept to maintain compatibility with `OSQL` may be changed, such as alignment of column headers for some data types.
- All commands must fit on one line, even `EXIT`. Interactive mode will not check for open parentheses or quotes for commands and prompt for successive lines. The ODBC sqlcmd allows the query run by `EXIT(query)` to span multiple lines.
- `-i` doesn't handle a comma `,` in a file name correctly unless the file name argument is triple quoted. For example:
Expand Down Expand Up @@ -237,6 +238,68 @@ To see a list of available styles along with colored syntax samples, use this co
:list color
```

### Code Page Support

The `-f` flag specifies the code page for reading input files and writing output. This is useful when working with SQL scripts saved in legacy encodings or when output needs to be in a specific encoding.

#### Format

```
-f codepage # Set both input and output to the same codepage
-f i:codepage # Set input codepage only
-f o:codepage # Set output codepage only
-f i:codepage,o:codepage # Set input and output to different codepages
-f o:codepage,i:codepage # Same as above (order doesn't matter)
```

#### Common Code Pages

| Code Page | Name | Description |
|-----------|------|-------------|
| 65001 | UTF-8 | Unicode (UTF-8) - default for most modern systems |
| 1200 | UTF-16LE | Unicode (UTF-16 Little-Endian) |
| 1201 | UTF-16BE | Unicode (UTF-16 Big-Endian) |
| 1252 | Windows-1252 | Western European (Windows) |
| 932 | Shift_JIS | Japanese |
| 936 | GBK | Chinese Simplified |
| 949 | EUC-KR | Korean |
| 950 | Big5 | Chinese Traditional |
| 437 | CP437 | OEM United States (DOS) |

#### Examples

**Run a script saved in Windows-1252 encoding:**
```bash
sqlcmd -S myserver -i legacy_script.sql -f 1252
```

**Read UTF-16 input file and write UTF-8 output:**
```bash
sqlcmd -S myserver -i unicode_script.sql -o results.txt -f i:1200,o:65001
```

**Process a Japanese Shift-JIS encoded script:**
```bash
sqlcmd -S myserver -i japanese_data.sql -f 932
```

**Write output in Windows-1252 for legacy applications:**
```bash
sqlcmd -S myserver -Q "SELECT * FROM Products" -o report.txt -f o:1252
```

**List all supported code pages:**
```bash
sqlcmd --list-codepages
```

#### Notes

- When no `-f` flag is specified, sqlcmd auto-detects UTF-8/UTF-16LE/UTF-16BE BOM (Byte Order Mark) in input files and switches to the appropriate decoder. If no BOM is present, UTF-8 is assumed.
- UTF-8 input files with BOM are handled automatically.
- On Windows, additional codepages installed on the system are available via the Windows API, even if not shown by `--list-codepages`.
- Use `--list-codepages` to see the built-in code pages with their names and descriptions.

### Packages

#### sqlcmd executable
Expand Down
29 changes: 29 additions & 0 deletions cmd/sqlcmd/sqlcmd.go
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,11 @@ type SQLCmdArguments struct {
ChangePassword string
ChangePasswordAndExit string
TraceFile string
CodePage string
// codePageSettings stores the parsed CodePageSettings after validation.
// This avoids parsing CodePage twice (in Validate and run).
codePageSettings *sqlcmd.CodePageSettings
ListCodePages bool
// Keep Help at the end of the list
Help bool
}
Expand Down Expand Up @@ -171,6 +176,12 @@ func (a *SQLCmdArguments) Validate(c *cobra.Command) (err error) {
err = rangeParameterError("-t", fmt.Sprint(a.QueryTimeout), 0, 65534, true)
case a.ServerCertificate != "" && !encryptConnectionAllowsTLS(a.EncryptConnection):
err = localizer.Errorf("The -J parameter requires encryption to be enabled (-N true, -N mandatory, or -N strict).")
case a.CodePage != "":
if codePageSettings, parseErr := sqlcmd.ParseCodePage(a.CodePage); parseErr != nil {
err = localizer.Errorf(`'-f %s': %v`, a.CodePage, parseErr)
} else {
a.codePageSettings = codePageSettings
}
}
}
if err != nil {
Expand Down Expand Up @@ -239,6 +250,17 @@ func Execute(version string) {
listLocalServers()
os.Exit(0)
}
// List supported codepages
if args.ListCodePages {
fmt.Println(localizer.Sprintf("Supported Code Pages:"))
fmt.Println()
fmt.Printf("%-8s %-20s %s\n", "Code", "Name", "Description")
fmt.Printf("%-8s %-20s %s\n", "----", "----", "-----------")
for _, cp := range sqlcmd.SupportedCodePages() {
fmt.Printf("%-8d %-20s %s\n", cp.CodePage, cp.Name, cp.Description)
}
os.Exit(0)
}
if len(argss) > 0 {
fmt.Printf("%s'%s': Unknown command. Enter '--help' for command help.", sqlcmdErrorPrefix, argss[0])
os.Exit(1)
Expand Down Expand Up @@ -479,6 +501,8 @@ func setFlags(rootCmd *cobra.Command, args *SQLCmdArguments) {
rootCmd.Flags().BoolVarP(&args.EnableColumnEncryption, "enable-column-encryption", "g", false, localizer.Sprintf("Enable column encryption"))
rootCmd.Flags().StringVarP(&args.ChangePassword, "change-password", "z", "", localizer.Sprintf("New password"))
rootCmd.Flags().StringVarP(&args.ChangePasswordAndExit, "change-password-exit", "Z", "", localizer.Sprintf("New password and exit"))
rootCmd.Flags().StringVarP(&args.CodePage, "code-page", "f", "", localizer.Sprintf("Specifies the code page for input/output. Use 65001 for UTF-8. Format: codepage | i:codepage[,o:codepage] | o:codepage[,i:codepage]"))
rootCmd.Flags().BoolVar(&args.ListCodePages, "list-codepages", false, localizer.Sprintf("List supported code pages and exit"))
}

func setScriptVariable(v string) string {
Expand Down Expand Up @@ -813,6 +837,11 @@ func run(vars *sqlcmd.Variables, args *SQLCmdArguments) (int, error) {
defer s.StopCloseHandler()
s.UnicodeOutputFile = args.UnicodeOutputFile

// Apply codepage settings (already parsed and validated in Validate)
if args.codePageSettings != nil {
s.CodePage = args.codePageSettings
}

if args.DisableCmd != nil {
s.Cmd.DisableSysCommands(args.errorOnBlockedCmd())
}
Expand Down
21 changes: 21 additions & 0 deletions cmd/sqlcmd/sqlcmd_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,22 @@ func TestValidCommandLineToArgsConversion(t *testing.T) {
{[]string{"-N", "true", "-J", "/path/to/cert2.pem"}, func(args SQLCmdArguments) bool {
return args.EncryptConnection == "true" && args.ServerCertificate == "/path/to/cert2.pem"
}},
// Codepage flag tests
{[]string{"-f", "65001"}, func(args SQLCmdArguments) bool {
return args.CodePage == "65001"
}},
{[]string{"-f", "i:1252,o:65001"}, func(args SQLCmdArguments) bool {
return args.CodePage == "i:1252,o:65001"
}},
{[]string{"-f", "o:65001,i:1252"}, func(args SQLCmdArguments) bool {
return args.CodePage == "o:65001,i:1252"
}},
{[]string{"--code-page", "1252"}, func(args SQLCmdArguments) bool {
return args.CodePage == "1252"
}},
{[]string{"--list-codepages"}, func(args SQLCmdArguments) bool {
return args.ListCodePages
}},
}

for _, test := range commands {
Expand Down Expand Up @@ -178,6 +194,11 @@ func TestInvalidCommandLine(t *testing.T) {
{[]string{"-N", "optional", "-J", "/path/to/cert.pem"}, "The -J parameter requires encryption to be enabled (-N true, -N mandatory, or -N strict)."},
{[]string{"-N", "disable", "-J", "/path/to/cert.pem"}, "The -J parameter requires encryption to be enabled (-N true, -N mandatory, or -N strict)."},
{[]string{"-N", "strict", "-F", "myserver.domain.com", "-J", "/path/to/cert.pem"}, "The -F and the -J options are mutually exclusive."},
// Codepage validation tests
{[]string{"-f", "invalid"}, `'-f invalid': invalid codepage: invalid`},
{[]string{"-f", "99999"}, `'-f 99999': unsupported codepage 99999`},
{[]string{"-f", "i:invalid"}, `'-f i:invalid': invalid input codepage: i:invalid`},
{[]string{"-f", "x:1252"}, `'-f x:1252': invalid codepage: x:1252`},
}

for _, test := range commands {
Expand Down
214 changes: 214 additions & 0 deletions pkg/sqlcmd/codepage.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,214 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT license.

package sqlcmd

import (
"sort"
"strconv"
"strings"

"github.com/microsoft/go-sqlcmd/internal/localizer"
"golang.org/x/text/encoding"
"golang.org/x/text/encoding/charmap"
"golang.org/x/text/encoding/japanese"
"golang.org/x/text/encoding/korean"
"golang.org/x/text/encoding/simplifiedchinese"
"golang.org/x/text/encoding/traditionalchinese"
"golang.org/x/text/encoding/unicode"
)

// codepageEntry defines a codepage with its encoding and metadata
type codepageEntry struct {
encoding encoding.Encoding // nil for UTF-8 (Go's native encoding)
name string
description string
}

// codepageRegistry is the single source of truth for all supported codepages
// that work cross-platform. Both GetEncoding and SupportedCodePages use this
// registry. On Windows, additional codepages installed on the system are also
// available via the Windows API fallback in GetEncoding.
var codepageRegistry = map[int]codepageEntry{
// Unicode
65001: {nil, "UTF-8", "Unicode (UTF-8)"},
1200: {unicode.UTF16(unicode.LittleEndian, unicode.UseBOM), "UTF-16LE", "Unicode (UTF-16 Little-Endian)"},
1201: {unicode.UTF16(unicode.BigEndian, unicode.UseBOM), "UTF-16BE", "Unicode (UTF-16 Big-Endian)"},

// OEM/DOS codepages
437: {charmap.CodePage437, "CP437", "OEM United States"},
850: {charmap.CodePage850, "CP850", "OEM Multilingual Latin 1"},
852: {charmap.CodePage852, "CP852", "OEM Latin 2"},
855: {charmap.CodePage855, "CP855", "OEM Cyrillic"},
858: {charmap.CodePage858, "CP858", "OEM Multilingual Latin 1 + Euro"},
860: {charmap.CodePage860, "CP860", "OEM Portuguese"},
862: {charmap.CodePage862, "CP862", "OEM Hebrew"},
863: {charmap.CodePage863, "CP863", "OEM Canadian French"},
865: {charmap.CodePage865, "CP865", "OEM Nordic"},
866: {charmap.CodePage866, "CP866", "OEM Russian"},

// Windows codepages
874: {charmap.Windows874, "Windows-874", "Thai"},
1250: {charmap.Windows1250, "Windows-1250", "Central European"},
1251: {charmap.Windows1251, "Windows-1251", "Cyrillic"},
1252: {charmap.Windows1252, "Windows-1252", "Western European"},
1253: {charmap.Windows1253, "Windows-1253", "Greek"},
1254: {charmap.Windows1254, "Windows-1254", "Turkish"},
1255: {charmap.Windows1255, "Windows-1255", "Hebrew"},
1256: {charmap.Windows1256, "Windows-1256", "Arabic"},
1257: {charmap.Windows1257, "Windows-1257", "Baltic"},
1258: {charmap.Windows1258, "Windows-1258", "Vietnamese"},

// ISO-8859 codepages
28591: {charmap.ISO8859_1, "ISO-8859-1", "Latin 1 (Western European)"},
28592: {charmap.ISO8859_2, "ISO-8859-2", "Latin 2 (Central European)"},
28593: {charmap.ISO8859_3, "ISO-8859-3", "Latin 3 (South European)"},
28594: {charmap.ISO8859_4, "ISO-8859-4", "Latin 4 (North European)"},
28595: {charmap.ISO8859_5, "ISO-8859-5", "Cyrillic"},
28596: {charmap.ISO8859_6, "ISO-8859-6", "Arabic"},
28597: {charmap.ISO8859_7, "ISO-8859-7", "Greek"},
28598: {charmap.ISO8859_8, "ISO-8859-8", "Hebrew"},
28599: {charmap.ISO8859_9, "ISO-8859-9", "Turkish"},
28600: {charmap.ISO8859_10, "ISO-8859-10", "Nordic"},
28603: {charmap.ISO8859_13, "ISO-8859-13", "Baltic"},
28604: {charmap.ISO8859_14, "ISO-8859-14", "Celtic"},
28605: {charmap.ISO8859_15, "ISO-8859-15", "Latin 9 (Western European with Euro)"},
28606: {charmap.ISO8859_16, "ISO-8859-16", "Latin 10 (South-Eastern European)"},

// Cyrillic
20866: {charmap.KOI8R, "KOI8-R", "Russian"},
21866: {charmap.KOI8U, "KOI8-U", "Ukrainian"},

// Macintosh
10000: {charmap.Macintosh, "Macintosh", "Mac Roman"},
10007: {charmap.MacintoshCyrillic, "x-mac-cyrillic", "Mac Cyrillic"},

// EBCDIC
37: {charmap.CodePage037, "IBM037", "EBCDIC US-Canada"},
1047: {charmap.CodePage1047, "IBM1047", "EBCDIC Latin 1/Open System"},
1140: {charmap.CodePage1140, "IBM01140", "EBCDIC US-Canada with Euro"},

// Japanese
932: {japanese.ShiftJIS, "Shift_JIS", "Japanese (Shift-JIS)"},
20932: {japanese.EUCJP, "EUC-JP", "Japanese (EUC)"},
50220: {japanese.ISO2022JP, "ISO-2022-JP", "Japanese (JIS)"},
50221: {japanese.ISO2022JP, "csISO2022JP", "Japanese (JIS-Allow 1 byte Kana)"},
50222: {japanese.ISO2022JP, "ISO-2022-JP", "Japanese (JIS-Allow 1 byte Kana SO/SI)"},

// Korean
949: {korean.EUCKR, "EUC-KR", "Korean"},
51949: {korean.EUCKR, "EUC-KR", "Korean (EUC)"},

// Simplified Chinese
936: {simplifiedchinese.GBK, "GBK", "Chinese Simplified (GBK)"},
54936: {simplifiedchinese.GB18030, "GB18030", "Chinese Simplified (GB18030)"},
52936: {simplifiedchinese.HZGB2312, "HZ-GB-2312", "Chinese Simplified (HZ)"},

// Traditional Chinese
950: {traditionalchinese.Big5, "Big5", "Chinese Traditional (Big5)"},
}

// CodePageSettings holds the input and output codepage settings
type CodePageSettings struct {
InputCodePage int
OutputCodePage int
}

// ParseCodePage parses the -f codepage argument
// Format: codepage | i:codepage[,o:codepage] | o:codepage[,i:codepage]
func ParseCodePage(arg string) (*CodePageSettings, error) {
if arg == "" {
return nil, nil
}

settings := &CodePageSettings{}
parts := strings.Split(arg, ",")

for _, part := range parts {
part = strings.TrimSpace(part)
if part == "" {
continue
}

if strings.HasPrefix(strings.ToLower(part), "i:") {
// Input codepage
cp, err := strconv.Atoi(strings.TrimPrefix(strings.ToLower(part), "i:"))
if err != nil {
return nil, localizer.Errorf("invalid input codepage: %s", part)
}
settings.InputCodePage = cp
} else if strings.HasPrefix(strings.ToLower(part), "o:") {
// Output codepage
cp, err := strconv.Atoi(strings.TrimPrefix(strings.ToLower(part), "o:"))
if err != nil {
return nil, localizer.Errorf("invalid output codepage: %s", part)
}
settings.OutputCodePage = cp
} else {
// Both input and output
cp, err := strconv.Atoi(part)
if err != nil {
return nil, localizer.Errorf("invalid codepage: %s", part)
}
settings.InputCodePage = cp
settings.OutputCodePage = cp
}
}

// If a non-empty argument was provided but no codepage was parsed,
// treat this as an error rather than silently disabling codepage handling.
if settings.InputCodePage == 0 && settings.OutputCodePage == 0 {
return nil, localizer.Errorf("invalid codepage: %s", arg)
}

// Validate codepages
if settings.InputCodePage != 0 {
if _, err := GetEncoding(settings.InputCodePage); err != nil {
return nil, err
}
}
if settings.OutputCodePage != 0 {
if _, err := GetEncoding(settings.OutputCodePage); err != nil {
return nil, err
}
}

return settings, nil
}

// GetEncoding returns the encoding for a given Windows codepage number.
// Returns nil for UTF-8 (65001) since Go uses UTF-8 natively.
// If the codepage is not in the built-in registry, falls back to
// OS-specific support (Windows API on Windows, error on other platforms).
func GetEncoding(codepage int) (encoding.Encoding, error) {
entry, ok := codepageRegistry[codepage]
if !ok {
// Fallback to system-provided codepage support
return getSystemCodePageEncoding(codepage)
}
return entry.encoding, nil
}

// CodePageInfo describes a supported codepage
type CodePageInfo struct {
CodePage int
Name string
Description string
}

// SupportedCodePages returns a list of all supported codepages with descriptions
func SupportedCodePages() []CodePageInfo {
result := make([]CodePageInfo, 0, len(codepageRegistry))
for cp, entry := range codepageRegistry {
result = append(result, CodePageInfo{
CodePage: cp,
Name: entry.name,
Description: entry.description,
})
}
// Sort by codepage number for consistent output
sort.Slice(result, func(i, j int) bool {
return result[i].CodePage < result[j].CodePage
})
return result
}
Loading
Loading