diff --git a/README.md b/README.md index ac8e5489..d735a800 100644 --- a/README.md +++ b/README.md @@ -158,6 +158,7 @@ export default { htm: ['HTMLLexer'], html: ['HTMLLexer'], + php: ['PhpLexer'], mjs: ['JavascriptLexer'], js: ['JavascriptLexer'], // if you're writing jsx inside .js files, change this to JsxLexer @@ -261,7 +262,7 @@ export default { The `lexers` option let you configure which Lexer to use for which extension. Here is the default: Note the presence of a `default` which will catch any extension that is not listed. -There are 4 lexers available: `HandlebarsLexer`, `HTMLLexer`, `JavascriptLexer` and +There are 5 lexers available: `HandlebarsLexer`, `HTMLLexer`, `PhpLexer`, `JavascriptLexer` and `JsxLexer`. Each has configurations of its own. Typescript is supported via `JavascriptLexer` and `JsxLexer`. If you need to change the defaults, you can do it like so: @@ -372,6 +373,18 @@ Typescript is supported via Javascript and Jsx lexers. If you are using Javascri } ``` +#### Php + +```js +{ + // PhpLexer default config (php) + php: [{ + lexer: 'PhpLexer', + functions: ['t'], // Array of functions to match + }] +} +``` + #### Custom lexers You can provide function instead of string as a custom lexer. diff --git a/bin/cli.js b/bin/cli.js old mode 100755 new mode 100644 diff --git a/index.d.ts b/index.d.ts index f00a68a9..9c163e96 100644 --- a/index.d.ts +++ b/index.d.ts @@ -3,6 +3,7 @@ import EventEmitter from 'events' export type SupportedLexer = | 'HandlebarsLexer' | 'HTMLLexer' + | 'PhpLexer' | 'JavascriptLexer' | 'JsxLexer' @@ -26,6 +27,11 @@ export interface HTMLLexerConfig { optionAttr?: string } +export interface PhpLexerConfig { + lexer: 'PhpLexer' + functions?: string[] +} + export interface JavascriptLexerConfig { lexer: 'JavascriptLexer' functions?: string[] @@ -82,6 +88,7 @@ export interface JsxWithTypesLexerConfig { export type LexerConfig = | HandlebarsLexerConfig | HTMLLexerConfig + | PhpLexerConfig | JavascriptLexerConfig | JavascriptWithTypesLexerConfig | JsxLexerConfig @@ -108,6 +115,7 @@ export interface UserConfig { handlebars?: (SupportedLexer | CustomLexer | LexerConfig)[] htm?: (SupportedLexer | CustomLexer | LexerConfig)[] html?: (SupportedLexer | CustomLexer | LexerConfig)[] + php?: (SupportedLexer | CustomLexer | LexerConfig)[] mjs?: (SupportedLexer | CustomLexer | LexerConfig)[] js?: (SupportedLexer | CustomLexer | LexerConfig)[] ts?: (SupportedLexer | CustomLexer | LexerConfig)[] diff --git a/src/index.js b/src/index.js index 1e3c5bcd..91dabef4 100644 --- a/src/index.js +++ b/src/index.js @@ -7,5 +7,6 @@ export { default as gulp } from './transform.js' export { default as BaseLexer } from './lexers/base-lexer.js' export { default as HandlebarsLexer } from './lexers/handlebars-lexer.js' export { default as HTMLLexer } from './lexers/html-lexer.js' +export { default as PhpLexer } from './lexers/php-lexer.js' export { default as JavascriptLexer } from './lexers/javascript-lexer.js' export { default as JsxLexer } from './lexers/jsx-lexer.js' diff --git a/src/lexers/php-lexer.js b/src/lexers/php-lexer.js new file mode 100644 index 00000000..af39969f --- /dev/null +++ b/src/lexers/php-lexer.js @@ -0,0 +1,50 @@ +import BaseLexer from './base-lexer.js' + +export default class PhpLexer extends BaseLexer { + constructor(options = {}) { + super(options) + this.functions = options.functions || ['t'] + } + + extract(content, filename) { + const keys = [] + + // Regular expression to capture translation function calls in PHP files + const functionPattern = new RegExp( + // Optionally match the @ at the beginning of the function + `(?:@)?` + + // Capture the name of the translation function, such as '__', 'trans', 'trans_choice' + `(${this.functions.join('|')})` + // matches[1] → function name + // Open a parenthesis and capture the whitespace before the argument + `\\(\\s*` + + // Capture the argument in single or double quotes, escaping supported (e.g., \' or \") + `(?:` + + `'((?:\\\\'|[^'])*)'` + // matches[2] → content between single quotes + `|` + + `"((?:\\\\"|[^"])*)"` + // matches[3] → content between double quotes + `)` + + // Ensure there is no concatenation after the function (no '+' or '.') + `(?!\\s*\\.)`, + 'g' + ) + + // Iterate over the matches and extract the keys + let matches + while ((matches = functionPattern.exec(content)) !== null) { + // Get the key from the match + let key = matches[2] || matches[3] + + // If the key is not found, continue to the next match + if (!key) continue + + // Remove the escaping from the key + key = key.replace(/\\'/g, "'").replace(/\\"/g, '"') + + // Push the key into the keys array + keys.push({ key }) + } + + // Return the keys found in the content + return keys + } +} diff --git a/src/parser.js b/src/parser.js index e8d46cb4..2c4a7c30 100644 --- a/src/parser.js +++ b/src/parser.js @@ -2,6 +2,7 @@ import path from 'path' import EventEmitter from 'events' import HandlebarsLexer from './lexers/handlebars-lexer.js' import HTMLLexer from './lexers/html-lexer.js' +import PhpLexer from './lexers/php-lexer.js' import JavascriptLexer from './lexers/javascript-lexer.js' import JsxLexer from './lexers/jsx-lexer.js' @@ -12,6 +13,8 @@ const lexers = { htm: ['HTMLLexer'], html: ['HTMLLexer'], + php: ['PhpLexer'], + mjs: ['JavascriptLexer'], js: ['JavascriptLexer'], ts: ['JavascriptLexer'], @@ -26,6 +29,7 @@ const lexers = { const lexersMap = { HandlebarsLexer, HTMLLexer, + PhpLexer, JavascriptLexer, JsxLexer, } diff --git a/test/lexers/php-lexer.test.js b/test/lexers/php-lexer.test.js new file mode 100755 index 00000000..b8eede60 --- /dev/null +++ b/test/lexers/php-lexer.test.js @@ -0,0 +1,66 @@ +import { assert } from 'chai' +import PhpLexer from '../../src/lexers/php-lexer.js' + +describe('php-lexer', () => { + it('extracts keys from translation function', (done) => { + const Lexer = new PhpLexer() + const content = 't("first")' + assert.deepEqual(Lexer.extract(content), [{ key: 'first' }]) + done() + }) + + it('extracts multiple keys', () => { + const Lexer = new PhpLexer() + const content = 't("first"); t("second")' + assert.deepEqual(Lexer.extract(content), [ + { key: 'first' }, + { key: 'second' }, + ]) + }) + + it('supports custom function names', () => { + const Lexer = new PhpLexer({ functions: ['__'] }) + const content = '__("custom")' + assert.deepEqual(Lexer.extract(content), [{ key: 'custom' }]) + }) + + it('extracts key when other parameters are present', () => { + const Lexer = new PhpLexer() + const content = 't("key", $param1, $param2)' + assert.deepEqual(Lexer.extract(content), [{ key: 'key' }]) + }) + + it('extracts keys from nested functions', () => { + const Lexer = new PhpLexer() + const content = 't("key", ["nested" => t("nested_key")])' + assert.deepEqual(Lexer.extract(content), [ + { key: 'key' }, + { key: 'nested_key' }, + ]) + }) + + it('extracts keys with %key or :key syntax', () => { + const Lexer = new PhpLexer() + const content = 't("My %key"); t("My :key")' + assert.deepEqual(Lexer.extract(content), [ + { key: 'My %key' }, + { key: 'My :key' }, + ]) + }) + + it('extracts keys with escaped characters', () => { + const Lexer = new PhpLexer() + const content = + 't("escaped \\"double quote\\""); t(\'escaped \\\'single quote\\\'\')' + assert.deepEqual(Lexer.extract(content), [ + { key: 'escaped "double quote"' }, + { key: "escaped 'single quote'" }, + ]) + }) + + it('does not throw on invalid PHP', () => { + const Lexer = new PhpLexer() + const content = 't("unclosed' + assert.doesNotThrow(() => Lexer.extract(content)) + }) +})