Skip to content

Commit da6895d

Browse files
mrdoobclaude
andauthored
USDAParser: Improve text parsing robustness. (#32747)
Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com>
1 parent 43166fc commit da6895d

File tree

1 file changed

+303
-6
lines changed

1 file changed

+303
-6
lines changed

examples/jsm/loaders/usd/USDAParser.js

Lines changed: 303 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,9 @@ class USDAParser {
22

33
parseText( text ) {
44

5+
// Preprocess: strip comments and normalize multiline values
6+
text = this._preprocess( text );
7+
58
const root = {};
69

710
const lines = text.split( '\n' );
@@ -15,10 +18,18 @@ class USDAParser {
1518

1619
if ( line.includes( '=' ) ) {
1720

18-
const assignment = line.split( '=' );
21+
// Find the first '=' that's not inside quotes
22+
const eqIdx = this._findAssignmentOperator( line );
23+
24+
if ( eqIdx === - 1 ) {
25+
26+
string = line.trim();
27+
continue;
28+
29+
}
1930

20-
const lhs = assignment[ 0 ].trim();
21-
const rhs = assignment[ 1 ].trim();
31+
const lhs = line.slice( 0, eqIdx ).trim();
32+
const rhs = line.slice( eqIdx + 1 ).trim();
2233

2334
if ( rhs.endsWith( '{' ) ) {
2435

@@ -104,6 +115,245 @@ class USDAParser {
104115

105116
}
106117

118+
_preprocess( text ) {
119+
120+
// Remove block comments /* ... */
121+
text = this._stripBlockComments( text );
122+
123+
// Remove line comments # ... (but preserve #usda header)
124+
// Only remove # comments that aren't at the start of a line or after whitespace
125+
const lines = text.split( '\n' );
126+
const processed = [];
127+
128+
let inMultilineValue = false;
129+
let bracketDepth = 0;
130+
let parenDepth = 0;
131+
let accumulated = '';
132+
133+
for ( let i = 0; i < lines.length; i ++ ) {
134+
135+
let line = lines[ i ];
136+
137+
// Strip inline comments (but not inside strings)
138+
line = this._stripInlineComment( line );
139+
140+
// Track bracket/paren depth for multiline values
141+
const trimmed = line.trim();
142+
143+
if ( inMultilineValue ) {
144+
145+
// Continue accumulating multiline value
146+
accumulated += ' ' + trimmed;
147+
148+
// Update depths
149+
for ( const ch of trimmed ) {
150+
151+
if ( ch === '[' ) bracketDepth ++;
152+
else if ( ch === ']' ) bracketDepth --;
153+
else if ( ch === '(' && bracketDepth > 0 ) parenDepth ++;
154+
else if ( ch === ')' && bracketDepth > 0 ) parenDepth --;
155+
156+
}
157+
158+
// Check if multiline value is complete
159+
if ( bracketDepth === 0 && parenDepth === 0 ) {
160+
161+
processed.push( accumulated );
162+
accumulated = '';
163+
inMultilineValue = false;
164+
165+
}
166+
167+
} else {
168+
169+
// Check if this line starts a multiline array value
170+
// Look for patterns like "attr = [" or "attr = @path@[" without closing ]
171+
if ( trimmed.includes( '=' ) ) {
172+
173+
const eqIdx = this._findAssignmentOperator( trimmed );
174+
175+
if ( eqIdx !== - 1 ) {
176+
177+
const rhs = trimmed.slice( eqIdx + 1 ).trim();
178+
179+
// Count brackets in the value part
180+
let openBrackets = 0;
181+
let closeBrackets = 0;
182+
183+
for ( const ch of rhs ) {
184+
185+
if ( ch === '[' ) openBrackets ++;
186+
else if ( ch === ']' ) closeBrackets ++;
187+
188+
}
189+
190+
if ( openBrackets > closeBrackets ) {
191+
192+
// Multiline array detected
193+
inMultilineValue = true;
194+
bracketDepth = openBrackets - closeBrackets;
195+
parenDepth = 0;
196+
accumulated = trimmed;
197+
continue;
198+
199+
}
200+
201+
}
202+
203+
}
204+
205+
processed.push( trimmed );
206+
207+
}
208+
209+
}
210+
211+
return processed.join( '\n' );
212+
213+
}
214+
215+
_stripBlockComments( text ) {
216+
217+
// Iteratively remove /* ... */ comments without regex backtracking
218+
let result = '';
219+
let i = 0;
220+
221+
while ( i < text.length ) {
222+
223+
// Check for block comment start
224+
if ( text[ i ] === '/' && i + 1 < text.length && text[ i + 1 ] === '*' ) {
225+
226+
// Find the closing */
227+
let j = i + 2;
228+
229+
while ( j < text.length ) {
230+
231+
if ( text[ j ] === '*' && j + 1 < text.length && text[ j + 1 ] === '/' ) {
232+
233+
// Found closing, skip past it
234+
j += 2;
235+
break;
236+
237+
}
238+
239+
j ++;
240+
241+
}
242+
243+
// Move past the comment (or to end if unclosed)
244+
i = j;
245+
246+
} else {
247+
248+
result += text[ i ];
249+
i ++;
250+
251+
}
252+
253+
}
254+
255+
return result;
256+
257+
}
258+
259+
_stripInlineComment( line ) {
260+
261+
// Don't strip if line starts with #usda
262+
if ( line.trim().startsWith( '#usda' ) ) return line;
263+
264+
// Find # that's not inside a string
265+
let inString = false;
266+
let stringChar = null;
267+
let escaped = false;
268+
269+
for ( let i = 0; i < line.length; i ++ ) {
270+
271+
const ch = line[ i ];
272+
273+
if ( escaped ) {
274+
275+
escaped = false;
276+
continue;
277+
278+
}
279+
280+
if ( ch === '\\' ) {
281+
282+
escaped = true;
283+
continue;
284+
285+
}
286+
287+
if ( ! inString && ( ch === '"' || ch === '\'' ) ) {
288+
289+
inString = true;
290+
stringChar = ch;
291+
292+
} else if ( inString && ch === stringChar ) {
293+
294+
inString = false;
295+
stringChar = null;
296+
297+
} else if ( ! inString && ch === '#' ) {
298+
299+
// Found comment start outside of string
300+
return line.slice( 0, i ).trimEnd();
301+
302+
}
303+
304+
}
305+
306+
return line;
307+
308+
}
309+
310+
_findAssignmentOperator( line ) {
311+
312+
// Find the first '=' that's not inside quotes
313+
let inString = false;
314+
let stringChar = null;
315+
let escaped = false;
316+
317+
for ( let i = 0; i < line.length; i ++ ) {
318+
319+
const ch = line[ i ];
320+
321+
if ( escaped ) {
322+
323+
escaped = false;
324+
continue;
325+
326+
}
327+
328+
if ( ch === '\\' ) {
329+
330+
escaped = true;
331+
continue;
332+
333+
}
334+
335+
if ( ! inString && ( ch === '"' || ch === '\'' ) ) {
336+
337+
inString = true;
338+
stringChar = ch;
339+
340+
} else if ( inString && ch === stringChar ) {
341+
342+
inString = false;
343+
stringChar = null;
344+
345+
} else if ( ! inString && ch === '=' ) {
346+
347+
return i;
348+
349+
}
350+
351+
}
352+
353+
return - 1;
354+
355+
}
356+
107357
/**
108358
* Parse USDA text and return raw spec data in specsByPath format.
109359
* Used by USDComposer for unified scene composition.
@@ -438,19 +688,66 @@ class USDAParser {
438688
// String/token types
439689
if ( valueType === 'string' || valueType === 'token' ) {
440690

441-
return str.replace( /"/g, '' );
691+
return this._parseString( str );
442692

443693
}
444694

445695
// Asset path
446696
if ( valueType === 'asset' ) {
447697

448-
return str.replace( /@/g, '' );
698+
return str.replace( /@/g, '' ).replace( /"/g, '' );
449699

450700
}
451701

452702
// Default: return as string with quotes removed
453-
return str.replace( /"/g, '' );
703+
return this._parseString( str );
704+
705+
}
706+
707+
_parseString( str ) {
708+
709+
// Remove surrounding quotes
710+
if ( ( str.startsWith( '"' ) && str.endsWith( '"' ) ) ||
711+
( str.startsWith( '\'' ) && str.endsWith( '\'' ) ) ) {
712+
713+
str = str.slice( 1, - 1 );
714+
715+
}
716+
717+
// Handle escape sequences
718+
let result = '';
719+
let i = 0;
720+
721+
while ( i < str.length ) {
722+
723+
if ( str[ i ] === '\\' && i + 1 < str.length ) {
724+
725+
const next = str[ i + 1 ];
726+
727+
switch ( next ) {
728+
729+
case 'n': result += '\n'; break;
730+
case 't': result += '\t'; break;
731+
case 'r': result += '\r'; break;
732+
case '\\': result += '\\'; break;
733+
case '"': result += '"'; break;
734+
case '\'': result += '\''; break;
735+
default: result += next; break;
736+
737+
}
738+
739+
i += 2;
740+
741+
} else {
742+
743+
result += str[ i ];
744+
i ++;
745+
746+
}
747+
748+
}
749+
750+
return result;
454751

455752
}
456753

0 commit comments

Comments
 (0)