/* * tclexpat.c -- * * A Tcl interface to James Clark's expat XML parser * * Copyright (c) 1998 Steve Ball, Zveno Pty Ltd * * with modifications * by Jochen Loewer(loewerj@hotmail.com) (July 1999) * by ericm@scriptics.com, 1999.6.25 * by Rolf Ade (rolf@pointsman.de) (2000, 2001) * * * Zveno Pty Ltd makes this software and associated documentation * available free of charge for any purpose. You may make copies * of the software but you must include all of this notice on any copy. * * Zveno Pty Ltd does not warrant that this software is error free * or fit for any purpose. Zveno Pty Ltd disclaims any liability for * all claims, expenses, losses, damages and costs any user may incur * as a result of using, copying or modifying the software. * * Jochen Loewer does not warrant that this software is error free * or fit for any purpose. Jochen Loewer disclaims any liability for * all claims, expenses, losses, damages and costs any user may incur * as a result of using, copying or modifying the software. * */ /*---------------------------------------------------------------------------- | Includes | \---------------------------------------------------------------------------*/ #include #include #include #include #include #ifdef _MSC_VER #include #endif #ifdef _POSIX_SOURCE #include #endif /* Used internal als status, like TCL_OK, TCL_ERROR etc. As a consequent, application specific error codes must be at least greater than 5 */ #define ERROR_IN_EXTREFHANDLER 5 #define READ_SIZE (1024*8) #ifndef O_BINARY #ifdef _O_BINARY #define O_BINARY _O_BINARY #else #define O_BINARY 0 #endif #endif /*---------------------------------------------------------------------------- | Macros | \---------------------------------------------------------------------------*/ #define DBG(x) #define SetResult(interp,str) \ (Tcl_SetStringObj (Tcl_GetObjResult (interp), (str), -1)) #define SetIntResult(interp,i) \ (Tcl_SetIntObj (Tcl_GetObjResult (interp), (i) )) #define AppendResult(interp,str) \ (Tcl_AppendToObj (Tcl_GetObjResult (interp), (str), -1)) #define CheckArgs(min,max,n,msg) \ if ((objc < min) || (objc >max)) { \ Tcl_WrongNumArgs(interp, n, objv, msg); \ return TCL_ERROR; \ } #define CheckDefaultTclHandlerSet \ if (!activeTclHandlerSet) { \ activeTclHandlerSet = CreateTclHandlerSet("default");\ tmpTclHandlerSet = expat->firstTclHandlerSet; \ expat->firstTclHandlerSet = activeTclHandlerSet; \ activeTclHandlerSet->nextHandlerSet = tmpTclHandlerSet; \ } /*---------------------------------------------------------------------------- | typedefs | \---------------------------------------------------------------------------*/ typedef enum { EXPAT_INPUT_STRING, EXPAT_INPUT_CHANNEL, EXPAT_INPUT_FILENAME } TclExpat_InputType; /*---------------------------------------------------------------------------- | local globals | \---------------------------------------------------------------------------*/ static int uniqueCounter = 0; /* Counter to generate unique command names */ TDomThreaded(static Tcl_Mutex counterMutex;) /* Protect the counter (zv) */ /*---------------------------------------------------------------------------- | Prototypes for procedures defined later in this file: | \---------------------------------------------------------------------------*/ int TclExpatObjCmd _ANSI_ARGS_((ClientData dummy, Tcl_Interp *interp, int objc, Tcl_Obj *CONST objv[])); static int TclExpatInstanceCmd _ANSI_ARGS_((ClientData dummy, Tcl_Interp *interp, int objc, struct Tcl_Obj *CONST objv[])); static void TclExpatDeleteCmd _ANSI_ARGS_((ClientData clientData)); static Tcl_Obj* FindUniqueCmdName _ANSI_ARGS_((Tcl_Interp *interp)); static int TclExpatCheckWhiteData _ANSI_ARGS_((char *pc, int len)); static int TclExpatInitializeParser _ANSI_ARGS_((Tcl_Interp *interp, TclGenExpatInfo *expat, int resetOptions )); static void TclExpatFreeParser _ANSI_ARGS_((TclGenExpatInfo *expat)); static int TclExpatParse _ANSI_ARGS_((Tcl_Interp *interp, TclGenExpatInfo *expat, char *data, int len, TclExpat_InputType type)); static int TclExpatConfigure _ANSI_ARGS_((Tcl_Interp *interp, TclGenExpatInfo *expat, int objc, Tcl_Obj *CONST objv[])); static int TclExpatCget _ANSI_ARGS_((Tcl_Interp *interp, TclGenExpatInfo *expat, int objc, Tcl_Obj *CONST objv[])); static int TclExpatGet _ANSI_ARGS_((Tcl_Interp *interp, TclGenExpatInfo *expat, int objc, Tcl_Obj *CONST objv[])); static void TclExpatDispatchPCDATA _ANSI_ARGS_((TclGenExpatInfo *expat)); static void TclGenExpatElementStartHandler _ANSI_ARGS_((void *userdata, const XML_Char *name, const XML_Char **atts)); static void TclGenExpatElementEndHandler _ANSI_ARGS_((void *userData, const XML_Char *name)); static void TclGenExpatCharacterDataHandler _ANSI_ARGS_((void *userData, const XML_Char *s, int len)); static void TclGenExpatProcessingInstructionHandler _ANSI_ARGS_(( void *userData, const XML_Char *target, const XML_Char *data)); static int TclGenExpatExternalEntityRefHandler _ANSI_ARGS_(( XML_Parser parser, const XML_Char *openEntityNames, const XML_Char *base, const XML_Char *systemId, const XML_Char *publicId)); static void TclGenExpatDefaultHandler _ANSI_ARGS_ ((void *userData, const XML_Char *s, int len)); static void TclGenExpatNotationDeclHandler _ANSI_ARGS_ ((void *userData, const XML_Char *notationName, const XML_Char *base, const XML_Char *systemId, const XML_Char *publicId)); static int TclGenExpatUnknownEncodingHandler _ANSI_ARGS_ (( void *encodingHandlerData, const XML_Char *name, XML_Encoding *info)); static void TclGenExpatStartNamespaceDeclHandler _ANSI_ARGS_((void *userdata, const XML_Char *prefix, const XML_Char *uri)); static void TclGenExpatEndNamespaceDeclHandler _ANSI_ARGS_((void *userData, const XML_Char *prefix)); /* Following added by ericm@scriptics, 1999.6.25 */ /* Prototype definition for the TclExpat comment handler */ static void TclGenExpatCommentHandler _ANSI_ARGS_ ((void *userData, const XML_Char *data)); /* Prototype for TclExpat Not Standalone Handler */ static int TclGenExpatNotStandaloneHandler _ANSI_ARGS_ ((void *userData)); /* Prototype for TclExpat {Start|End}CdataSectionHandler */ static void TclGenExpatStartCdataSectionHandler _ANSI_ARGS_((void *userData)); static void TclGenExpatEndCdataSectionHandler _ANSI_ARGS_((void *userData)); /* Added by ericm@scriptics.com, 1999.09.13 */ /* Prototype for TclExpat (Element|Attlist) Declaration Handlers */ static void TclGenExpatElementDeclHandler _ANSI_ARGS_((void *userData, const XML_Char *name, XML_Content *model)); static void TclGenExpatAttlistDeclHandler _ANSI_ARGS_((void *userData, const XML_Char *elname, const XML_Char *name, const XML_Char *type, const XML_Char *dflt, int isrequired)); /* Prototypes for the TclExpat Doctype Decl handlers */ static void TclGenExpatStartDoctypeDeclHandler _ANSI_ARGS_((void *userData, const XML_Char *doctypeName, const XML_Char *sysid, const XML_Char *pubid, int has_internal_subset)); static void TclGenExpatEndDoctypeDeclHandler _ANSI_ARGS_((void *userData)); static void TclGenExpatXmlDeclHandler _ANSI_ARGS_((void *userData, const XML_Char *version, const XML_Char *encoding, int standalone)); static void TclGenExpatEntityDeclHandler _ANSI_ARGS_((void *userData, const XML_Char *entityname, int is_param, const XML_Char *value, int length, CONST XML_Char *base, CONST XML_Char *systemId, CONST XML_Char *publicId, CONST XML_Char *notationName)); /* *---------------------------------------------------------------------------- * * CreateTclHandlerSet -- * * Malloc's and initializes a tclHandlerSet. * * Results: * None. * * Side effects: * Mallocs memory for the structure and the 'name' field, sets all * handler scripts to NULL and inits some other fields. * *---------------------------------------------------------------------------- */ static TclHandlerSet* CreateTclHandlerSet (name) char *name; { TclHandlerSet *handlerSet; handlerSet = (TclHandlerSet*) MALLOC (sizeof (TclHandlerSet)); \ handlerSet->name = tdomstrdup (name); handlerSet->ignoreWhiteCDATAs = 0; handlerSet->status = TCL_OK; handlerSet->continueCount = 0; handlerSet->nextHandlerSet = NULL; handlerSet->elementstartcommand = NULL; handlerSet->elementendcommand = NULL; handlerSet->startnsdeclcommand = NULL; handlerSet->endnsdeclcommand = NULL; handlerSet->datacommand = NULL; handlerSet->picommand = NULL; handlerSet->defaultcommand = NULL; handlerSet->notationcommand = NULL; handlerSet->externalentitycommand = NULL; handlerSet->unknownencodingcommand = NULL; handlerSet->commentCommand = NULL; handlerSet->notStandaloneCommand = NULL; handlerSet->startCdataSectionCommand = NULL; handlerSet->endCdataSectionCommand = NULL; handlerSet->elementDeclCommand = NULL; handlerSet->attlistDeclCommand = NULL; handlerSet->startDoctypeDeclCommand = NULL; handlerSet->endDoctypeDeclCommand = NULL; handlerSet->xmlDeclCommand = NULL; handlerSet->entityDeclCommand = NULL; return handlerSet; } /* *---------------------------------------------------------------------------- * * CHandlerSetCreate -- * * Initializes a CHandlerSet. * * Results: * None. * * Side effects: * Mallocs memory for the 'name' of the structure, sets all * handler functions to NULL and inits some other fields. * *---------------------------------------------------------------------------- */ CHandlerSet* CHandlerSetCreate (name) char *name; { CHandlerSet *handlerSet; handlerSet = (CHandlerSet *) MALLOC (sizeof (CHandlerSet)); handlerSet->name = tdomstrdup (name); handlerSet->ignoreWhiteCDATAs = 0; handlerSet->nextHandlerSet = NULL; handlerSet->userData = NULL; handlerSet->resetProc = NULL; handlerSet->freeProc = NULL; handlerSet->initParseProc = NULL; handlerSet->parserResetProc = NULL; handlerSet->elementstartcommand = NULL; handlerSet->elementendcommand = NULL; handlerSet->startnsdeclcommand = NULL; handlerSet->endnsdeclcommand = NULL; handlerSet->datacommand = NULL; handlerSet->picommand = NULL; handlerSet->defaultcommand = NULL; handlerSet->notationcommand = NULL; handlerSet->externalentitycommand = NULL; handlerSet->unknownencodingcommand = NULL; handlerSet->commentCommand = NULL; handlerSet->notStandaloneCommand = NULL; handlerSet->startCdataSectionCommand = NULL; handlerSet->endCdataSectionCommand = NULL; handlerSet->elementDeclCommand = NULL; handlerSet->attlistDeclCommand = NULL; handlerSet->startDoctypeDeclCommand = NULL; handlerSet->endDoctypeDeclCommand = NULL; handlerSet->xmlDeclCommand = NULL; handlerSet->entityDeclCommand = NULL; return handlerSet; } /* *---------------------------------------------------------------------------- * * TclExpatObjCmd -- * * Creation command for expat class. * * Results: * The name of the newly created parser instance. * * Side effects: * This creates an expat parser. * *---------------------------------------------------------------------------- */ int TclExpatObjCmd(dummy, interp, objc, objv) ClientData dummy; Tcl_Interp *interp; int objc; Tcl_Obj *CONST objv[]; { TclGenExpatInfo *genexpat; int ns_mode = 0; char *nsoption; /* * Create the data structures for this parser. */ if (!(genexpat = (TclGenExpatInfo *) MALLOC(sizeof(TclGenExpatInfo)))) { FREE( (char*) genexpat); Tcl_SetResult(interp, "unable to create parser", NULL); return TCL_ERROR; } memset (genexpat, 0, sizeof (TclGenExpatInfo)); genexpat->interp = interp; genexpat->final = 1; /* * Find unique command name */ if (objc < 2) { genexpat->name = FindUniqueCmdName(interp); } else { genexpat->name = objv[1]; if (*(Tcl_GetString(genexpat->name)) != '-') { Tcl_IncrRefCount(genexpat->name); objv++; objc--; } else { genexpat->name = FindUniqueCmdName(interp); } } genexpat->paramentityparsing = XML_PARAM_ENTITY_PARSING_NEVER; if (objc > 1) { nsoption = Tcl_GetString(objv[1]); if (strcmp(nsoption,"-namespace")==0) { ns_mode = 1; objv++; objc--; } } genexpat->ns_mode = ns_mode; genexpat->nsSeparator = ':'; if (TclExpatInitializeParser(interp, genexpat, 0) != TCL_OK) { FREE( (char*) genexpat); return TCL_ERROR; } /* * Register a Tcl command for this parser instance. */ Tcl_CreateObjCommand(interp, Tcl_GetString(genexpat->name), TclExpatInstanceCmd, (ClientData) genexpat, TclExpatDeleteCmd); /* * Handle configuration options */ if (objc > 1) { if (TclExpatConfigure(interp, genexpat, objc - 1, objv + 1) != TCL_OK) { return TCL_ERROR; } } Tcl_SetObjResult(interp, genexpat->name); return TCL_OK; } /* *---------------------------------------------------------------------------- * * FindUniqueCmdName -- * * Generate new command name in caller's namespace. * * Results: * Returns newly allocated Tcl object containing name. * * Side effects: * Allocates Tcl object. * *---------------------------------------------------------------------------- */ static Tcl_Obj * FindUniqueCmdName(interp) Tcl_Interp *interp; { Tcl_Obj *name; Tcl_CmdInfo info; char s[20]; name = Tcl_NewStringObj("", 0); Tcl_IncrRefCount(name); do { TDomThreaded(Tcl_MutexLock(&counterMutex);) sprintf(s, "xmlparser%d", uniqueCounter++); TDomThreaded(Tcl_MutexUnlock(&counterMutex);) Tcl_SetStringObj(name, s, -1); } while (Tcl_GetCommandInfo(interp, Tcl_GetString(name), &info)); return name; } /* *---------------------------------------------------------------------------- * * TclExpatInitializeParser -- * * Create or re-initializes (if it already exists) the expat * parser and initialise (some of) the TclExpatInfo structure. * * Note that callback commands are not affected by this routine, * to allow a reset to leave these intact. * * Results: * A flag, signaling success or error. * * Side effects: * Creates or reset an expat parser. * Modifies TclExpatInfo fields. * *---------------------------------------------------------------------------- */ static int TclExpatInitializeParser(interp, expat, resetOptions) Tcl_Interp *interp; TclGenExpatInfo *expat; int resetOptions; { CHandlerSet *activeCHandlerSet; ExpatElemContent *eContent, *eContentSave; if (expat->parser) { XML_ParserReset (expat->parser, NULL); activeCHandlerSet = expat->firstCHandlerSet; while (activeCHandlerSet) { if (activeCHandlerSet->resetProc) { activeCHandlerSet->resetProc (expat->interp, activeCHandlerSet->userData); } activeCHandlerSet = activeCHandlerSet->nextHandlerSet; } } else { if (expat->ns_mode) { if (!(expat->parser = XML_ParserCreate_MM(NULL, MEM_SUITE, &expat->nsSeparator))) { Tcl_SetResult(interp, "unable to create expat parserNs", NULL); return TCL_ERROR; } } else { if (!(expat->parser = XML_ParserCreate_MM(NULL, MEM_SUITE, NULL))) { Tcl_SetResult(interp, "unable to create expat parser", NULL); return TCL_ERROR; } } } expat->status = TCL_OK; if (expat->result) { Tcl_DecrRefCount (expat->result); expat->result = NULL; } if (expat->cdata) { Tcl_DecrRefCount (expat->cdata); } expat->cdata = NULL; eContent = expat->eContents; while (eContent) { XML_FreeContentModel (expat->parser, eContent->content); eContentSave = eContent; eContent = eContent->next; FREE((char *) eContentSave); } expat->eContents = NULL; expat->finished = 0; expat->parsingStarted = 0; if (resetOptions) { expat->final = 1; expat->needWSCheck = 0; expat->noexpand = 0; expat->useForeignDTD = 0; expat->paramentityparsing = XML_PARAM_ENTITY_PARSING_NEVER; if (expat->baseURI) { Tcl_DecrRefCount (expat->baseURI); expat->baseURI = NULL; } } if (expat->baseURI) { XML_SetBase (expat->parser, Tcl_GetString (expat->baseURI)); Tcl_DecrRefCount (expat->baseURI); expat->baseURI = NULL; } /* * Set handlers for the parser to routines in this module. */ XML_SetElementHandler(expat->parser, (XML_StartElementHandler) TclGenExpatElementStartHandler, (XML_EndElementHandler) TclGenExpatElementEndHandler); XML_SetNamespaceDeclHandler(expat->parser, (XML_StartNamespaceDeclHandler) TclGenExpatStartNamespaceDeclHandler, (XML_EndNamespaceDeclHandler) TclGenExpatEndNamespaceDeclHandler); XML_SetCharacterDataHandler(expat->parser, (XML_CharacterDataHandler) TclGenExpatCharacterDataHandler); XML_SetProcessingInstructionHandler(expat->parser, (XML_ProcessingInstructionHandler) TclGenExpatProcessingInstructionHandler); XML_SetDefaultHandlerExpand(expat->parser, (XML_DefaultHandler) TclGenExpatDefaultHandler); XML_SetNotationDeclHandler(expat->parser, (XML_NotationDeclHandler) TclGenExpatNotationDeclHandler); XML_SetExternalEntityRefHandler(expat->parser, (XML_ExternalEntityRefHandler) TclGenExpatExternalEntityRefHandler); XML_SetUnknownEncodingHandler(expat->parser, (XML_UnknownEncodingHandler) TclGenExpatUnknownEncodingHandler, (void *) expat); XML_SetCommentHandler(expat->parser, TclGenExpatCommentHandler); XML_SetNotStandaloneHandler(expat->parser, TclGenExpatNotStandaloneHandler); XML_SetCdataSectionHandler(expat->parser, TclGenExpatStartCdataSectionHandler, TclGenExpatEndCdataSectionHandler); XML_SetElementDeclHandler(expat->parser, TclGenExpatElementDeclHandler); XML_SetAttlistDeclHandler(expat->parser, TclGenExpatAttlistDeclHandler); XML_SetDoctypeDeclHandler(expat->parser, TclGenExpatStartDoctypeDeclHandler, TclGenExpatEndDoctypeDeclHandler); XML_SetXmlDeclHandler (expat->parser, TclGenExpatXmlDeclHandler); XML_SetEntityDeclHandler (expat->parser, TclGenExpatEntityDeclHandler); if (expat->noexpand) { XML_SetDefaultHandlerExpand(expat->parser, NULL); XML_SetDefaultHandler(expat->parser, (XML_DefaultHandler) TclGenExpatDefaultHandler); } else { XML_SetDefaultHandler(expat->parser, NULL); XML_SetDefaultHandlerExpand(expat->parser, (XML_DefaultHandler) TclGenExpatDefaultHandler); } XML_SetUserData(expat->parser, (void *) expat); return TCL_OK; } /* *---------------------------------------------------------------------------- * * TclExpatFreeParser -- * * Destroy the expat parser structure and frees the stored content models, * if there one. * * Results: * None. * * Side effects: * Frees any memory allocated for the XML parser and (if still present) * the stored content models. * *---------------------------------------------------------------------------- */ static void TclExpatFreeParser(expat) TclGenExpatInfo *expat; { ExpatElemContent *eContent, *eContentSave; eContent = expat->eContents; while (eContent) { XML_FreeContentModel (expat->parser, eContent->content); eContentSave = eContent; eContent = eContent->next; FREE((char *) eContentSave); } expat->eContents = NULL; XML_ParserFree(expat->parser); expat->parser = NULL; } /* *---------------------------------------------------------------------------- * * TclExpatInstanceCmd -- * * Implements instance command for expat class objects. * * Results: * Depends on the method. * * Side effects: * Depends on the method. * *---------------------------------------------------------------------------- */ static int TclExpatInstanceCmd (clientData, interp, objc, objv) ClientData clientData; Tcl_Interp *interp; int objc; Tcl_Obj *CONST objv[]; { TclGenExpatInfo *expat = (TclGenExpatInfo *) clientData; char *data; int len = 0, optionIndex, result = TCL_OK; static CONST84 char *options[] = { "configure", "cget", "free", "get", "parse", "parsechannel", "parsefile", "reset", NULL }; enum options { EXPAT_CONFIGURE, EXPAT_CGET, EXPAT_FREE, EXPAT_GET, EXPAT_PARSE, EXPAT_PARSECHANNEL, EXPAT_PARSEFILE, EXPAT_RESET }; if (objc < 2) { Tcl_SetResult (interp, "wrong # args: should be \"parserCmd method ?arg ...?\"", TCL_STATIC); return TCL_ERROR; } if (Tcl_GetIndexFromObj(interp, objv[1], options, "option", 0, &optionIndex) != TCL_OK) { return TCL_ERROR; } switch ((enum options) optionIndex) { case EXPAT_CONFIGURE: if (objc < 3) { Tcl_SetResult (interp, "wrong # args: should be \"parserCmd configure