@@ -2,6 +2,9 @@ class USDAParser {
22
33 parseText ( text ) {
44
5+ // Preprocess: strip comments and normalize multiline values
6+ text = this . _preprocess ( text ) ;
7+
58 const root = { } ;
69
710 const lines = text . split ( '\n' ) ;
@@ -15,10 +18,18 @@ class USDAParser {
1518
1619 if ( line . includes ( '=' ) ) {
1720
18- const assignment = line . split ( '=' ) ;
21+ // Find the first '=' that's not inside quotes
22+ const eqIdx = this . _findAssignmentOperator ( line ) ;
23+
24+ if ( eqIdx === - 1 ) {
25+
26+ string = line . trim ( ) ;
27+ continue ;
28+
29+ }
1930
20- const lhs = assignment [ 0 ] . trim ( ) ;
21- const rhs = assignment [ 1 ] . trim ( ) ;
31+ const lhs = line . slice ( 0 , eqIdx ) . trim ( ) ;
32+ const rhs = line . slice ( eqIdx + 1 ) . trim ( ) ;
2233
2334 if ( rhs . endsWith ( '{' ) ) {
2435
@@ -104,6 +115,245 @@ class USDAParser {
104115
105116 }
106117
118+ _preprocess ( text ) {
119+
120+ // Remove block comments /* ... */
121+ text = this . _stripBlockComments ( text ) ;
122+
123+ // Remove line comments # ... (but preserve #usda header)
124+ // Only remove # comments that aren't at the start of a line or after whitespace
125+ const lines = text . split ( '\n' ) ;
126+ const processed = [ ] ;
127+
128+ let inMultilineValue = false ;
129+ let bracketDepth = 0 ;
130+ let parenDepth = 0 ;
131+ let accumulated = '' ;
132+
133+ for ( let i = 0 ; i < lines . length ; i ++ ) {
134+
135+ let line = lines [ i ] ;
136+
137+ // Strip inline comments (but not inside strings)
138+ line = this . _stripInlineComment ( line ) ;
139+
140+ // Track bracket/paren depth for multiline values
141+ const trimmed = line . trim ( ) ;
142+
143+ if ( inMultilineValue ) {
144+
145+ // Continue accumulating multiline value
146+ accumulated += ' ' + trimmed ;
147+
148+ // Update depths
149+ for ( const ch of trimmed ) {
150+
151+ if ( ch === '[' ) bracketDepth ++ ;
152+ else if ( ch === ']' ) bracketDepth -- ;
153+ else if ( ch === '(' && bracketDepth > 0 ) parenDepth ++ ;
154+ else if ( ch === ')' && bracketDepth > 0 ) parenDepth -- ;
155+
156+ }
157+
158+ // Check if multiline value is complete
159+ if ( bracketDepth === 0 && parenDepth === 0 ) {
160+
161+ processed . push ( accumulated ) ;
162+ accumulated = '' ;
163+ inMultilineValue = false ;
164+
165+ }
166+
167+ } else {
168+
169+ // Check if this line starts a multiline array value
170+ // Look for patterns like "attr = [" or "attr = @path@[" without closing ]
171+ if ( trimmed . includes ( '=' ) ) {
172+
173+ const eqIdx = this . _findAssignmentOperator ( trimmed ) ;
174+
175+ if ( eqIdx !== - 1 ) {
176+
177+ const rhs = trimmed . slice ( eqIdx + 1 ) . trim ( ) ;
178+
179+ // Count brackets in the value part
180+ let openBrackets = 0 ;
181+ let closeBrackets = 0 ;
182+
183+ for ( const ch of rhs ) {
184+
185+ if ( ch === '[' ) openBrackets ++ ;
186+ else if ( ch === ']' ) closeBrackets ++ ;
187+
188+ }
189+
190+ if ( openBrackets > closeBrackets ) {
191+
192+ // Multiline array detected
193+ inMultilineValue = true ;
194+ bracketDepth = openBrackets - closeBrackets ;
195+ parenDepth = 0 ;
196+ accumulated = trimmed ;
197+ continue ;
198+
199+ }
200+
201+ }
202+
203+ }
204+
205+ processed . push ( trimmed ) ;
206+
207+ }
208+
209+ }
210+
211+ return processed . join ( '\n' ) ;
212+
213+ }
214+
215+ _stripBlockComments ( text ) {
216+
217+ // Iteratively remove /* ... */ comments without regex backtracking
218+ let result = '' ;
219+ let i = 0 ;
220+
221+ while ( i < text . length ) {
222+
223+ // Check for block comment start
224+ if ( text [ i ] === '/' && i + 1 < text . length && text [ i + 1 ] === '*' ) {
225+
226+ // Find the closing */
227+ let j = i + 2 ;
228+
229+ while ( j < text . length ) {
230+
231+ if ( text [ j ] === '*' && j + 1 < text . length && text [ j + 1 ] === '/' ) {
232+
233+ // Found closing, skip past it
234+ j += 2 ;
235+ break ;
236+
237+ }
238+
239+ j ++ ;
240+
241+ }
242+
243+ // Move past the comment (or to end if unclosed)
244+ i = j ;
245+
246+ } else {
247+
248+ result += text [ i ] ;
249+ i ++ ;
250+
251+ }
252+
253+ }
254+
255+ return result ;
256+
257+ }
258+
259+ _stripInlineComment ( line ) {
260+
261+ // Don't strip if line starts with #usda
262+ if ( line . trim ( ) . startsWith ( '#usda' ) ) return line ;
263+
264+ // Find # that's not inside a string
265+ let inString = false ;
266+ let stringChar = null ;
267+ let escaped = false ;
268+
269+ for ( let i = 0 ; i < line . length ; i ++ ) {
270+
271+ const ch = line [ i ] ;
272+
273+ if ( escaped ) {
274+
275+ escaped = false ;
276+ continue ;
277+
278+ }
279+
280+ if ( ch === '\\' ) {
281+
282+ escaped = true ;
283+ continue ;
284+
285+ }
286+
287+ if ( ! inString && ( ch === '"' || ch === '\'' ) ) {
288+
289+ inString = true ;
290+ stringChar = ch ;
291+
292+ } else if ( inString && ch === stringChar ) {
293+
294+ inString = false ;
295+ stringChar = null ;
296+
297+ } else if ( ! inString && ch === '#' ) {
298+
299+ // Found comment start outside of string
300+ return line . slice ( 0 , i ) . trimEnd ( ) ;
301+
302+ }
303+
304+ }
305+
306+ return line ;
307+
308+ }
309+
310+ _findAssignmentOperator ( line ) {
311+
312+ // Find the first '=' that's not inside quotes
313+ let inString = false ;
314+ let stringChar = null ;
315+ let escaped = false ;
316+
317+ for ( let i = 0 ; i < line . length ; i ++ ) {
318+
319+ const ch = line [ i ] ;
320+
321+ if ( escaped ) {
322+
323+ escaped = false ;
324+ continue ;
325+
326+ }
327+
328+ if ( ch === '\\' ) {
329+
330+ escaped = true ;
331+ continue ;
332+
333+ }
334+
335+ if ( ! inString && ( ch === '"' || ch === '\'' ) ) {
336+
337+ inString = true ;
338+ stringChar = ch ;
339+
340+ } else if ( inString && ch === stringChar ) {
341+
342+ inString = false ;
343+ stringChar = null ;
344+
345+ } else if ( ! inString && ch === '=' ) {
346+
347+ return i ;
348+
349+ }
350+
351+ }
352+
353+ return - 1 ;
354+
355+ }
356+
107357 /**
108358 * Parse USDA text and return raw spec data in specsByPath format.
109359 * Used by USDComposer for unified scene composition.
@@ -438,19 +688,66 @@ class USDAParser {
438688 // String/token types
439689 if ( valueType === 'string' || valueType === 'token' ) {
440690
441- return str . replace ( / " / g , '' ) ;
691+ return this . _parseString ( str ) ;
442692
443693 }
444694
445695 // Asset path
446696 if ( valueType === 'asset' ) {
447697
448- return str . replace ( / @ / g, '' ) ;
698+ return str . replace ( / @ / g, '' ) . replace ( / " / g , '' ) ;
449699
450700 }
451701
452702 // Default: return as string with quotes removed
453- return str . replace ( / " / g, '' ) ;
703+ return this . _parseString ( str ) ;
704+
705+ }
706+
707+ _parseString ( str ) {
708+
709+ // Remove surrounding quotes
710+ if ( ( str . startsWith ( '"' ) && str . endsWith ( '"' ) ) ||
711+ ( str . startsWith ( '\'' ) && str . endsWith ( '\'' ) ) ) {
712+
713+ str = str . slice ( 1 , - 1 ) ;
714+
715+ }
716+
717+ // Handle escape sequences
718+ let result = '' ;
719+ let i = 0 ;
720+
721+ while ( i < str . length ) {
722+
723+ if ( str [ i ] === '\\' && i + 1 < str . length ) {
724+
725+ const next = str [ i + 1 ] ;
726+
727+ switch ( next ) {
728+
729+ case 'n' : result += '\n' ; break ;
730+ case 't' : result += '\t' ; break ;
731+ case 'r' : result += '\r' ; break ;
732+ case '\\' : result += '\\' ; break ;
733+ case '"' : result += '"' ; break ;
734+ case '\'' : result += '\'' ; break ;
735+ default : result += next ; break ;
736+
737+ }
738+
739+ i += 2 ;
740+
741+ } else {
742+
743+ result += str [ i ] ;
744+ i ++ ;
745+
746+ }
747+
748+ }
749+
750+ return result ;
454751
455752 }
456753
0 commit comments