| Home | Trees | Indices | Help | 
 | 
|---|
|  | 
  1  """This module encapsulates a document description stored in an XML file. 
  2   
  3  This is mainly used by GNUmed/Archive. 
  4   
  5  @copyright: GPL v2 or later 
  6  """ 
  7  #============================================================ 
  8  __author__ = "Karsten Hilbert <Karsten.Hilbert@gmx.net>" 
  9   
 10  import sys 
 11  import os.path 
 12  import fileinput 
 13  import string 
 14  import logging 
 15   
 16   
 17  _log = logging.getLogger('gm.docs') 
 18  #============================================================ 
 20      # handlers for __getitem__() 
 21      _get_handler = {} 
 22      #-------------------------------------------------------- 
 24          # sanity checks 
 25          if aBaseDir is None: 
 26              raise ConstructorError, "need document path" 
 27          if not os.path.exists(os.path.abspath(aBaseDir)): 
 28              raise ConstructorError, "document path [%s] does not exist" % aBaseDir 
 29          self.__base_dir = aBaseDir 
 30          _log.debug("working from directory [%s]" % self.__base_dir) 
 31   
 32          if aCfg is None: 
 33              _log.warning('no config file specified') 
 34              import gmCfg 
 35              self.__cfg = gmCfg.gmDefCfgFile 
 36          else: 
 37              self.__cfg = aCfg 
 38   
 39          self.__group = str(aGroup) 
 40   
 41          tmp = self.__cfg.get(self.__group, "description") 
 42          self.__xml_file = os.path.join(self.__base_dir, tmp) 
 43          if not os.path.exists(self.__xml_file): 
 44              raise ConstructorError, "skipping [%s]: description file [%s] not found" % (self.__base_dir, tmp) 
 45   
 46          self.__data = {} 
 47   
 48  #       if not self.__load_from_xml(): 
 49  #           raise ConstructorError, "XML file [%s] cannot be parsed correctly" % anXmlFile 
 50   
 51          return None 
 52      #-------------------------------------------------------- 
 54          """Load document metadata from XML file. 
 55          """ 
 56          # document type 
 57          tmp = self.__get_from_xml(aTag = self.cfg.get(self.__group, "type_tag"), anXMLfile = self.__xml_file) 
 58          if tmp is None: 
 59              _log.error("cannot load document type.") 
 60              return None 
 61          else: 
 62              self.__data['type'] = string.join(tmp) 
 63          # document comment 
 64          tmp = self.__get_from_xml(aTag = self.cfg.get(self.__group, "comment_tag"), anXMLfile = self.__xml_file) 
 65          if tmp is None: 
 66              _log.error("cannot load document comment") 
 67              return None 
 68          else: 
 69              self.__data['comment'] = string.join(tmp) 
 70          # document reference date 
 71          tmp = self.__get_from_xml(aTag = self.cfg.get(self.__group, "date_tag"), anXMLfile = self.__xml_file) 
 72          if tmp is None: 
 73              _log.error("cannot load document reference date.") 
 74              return None 
 75          else: 
 76              self.__data['date'] = string.join(tmp) 
 77          # external reference string 
 78          tmp = self.__get_from_xml(aTag = self.cfg.get(self.__group, "ref_tag"), anXMLfile = self.__xml_file) 
 79          if tmp is None: 
 80              _log.error("cannot load document reference string.") 
 81              return None 
 82          else: 
 83              self.__data['reference'] = string.join(tmp) 
 84          # document description 
 85          tmp = self.__get_from_xml(aTag = self.cfg.get(self.__group, "aux_comment_tag"), anXMLfile = self.__xml_file) 
 86          if tmp is None: 
 87              _log.error("cannot load long document description.") 
 88          else: 
 89              self.__data['description'] = string.join(tmp) 
 90          # list of data files 
 91  #       if not self.__read_img_list(self.__xml_file, aBaseDir, self.__group): 
 92  #           _log.error("Cannot retrieve list of document data files.") 
 93  #           return None 
 94   
 95          _log.debug("long document description: " + str(self.__data['description'])) 
 96          _log.debug("document reference string: " + str(self.__data['reference'])) 
 97          _log.debug("document reference date: " + str(self.__data['date'])) 
 98          _log.debug("Document comment: " + str(self.__data['comment'])) 
 99          _log.debug("Document type: " + str(self.__data['type'])) 
100   
101          return 1 
102      #-------------------------------------------------------- 
103      # attribute access 
104      #-------------------------------------------------------- 
106          try: 
107              return self.__data[item] 
108          except KeyError: 
109              try: 
110                  return xmlDocDesc._get_handler[item](self) 
111              except KeyError: 
112                  _log.exception('[%s] neither cached in self.__data nor get handler available' % item, sys.exc_info()) 
113                  return None 
114      #-------------------------------------------------------- 
116          try: 
117              return self.__data['objects'] 
118          except KeyError: 
119              self.__load_obj_list() 
120              return self.__data['objects'] 
121          return None 
122      #-------------------------------------------------------- 
123      _get_handler['objects'] = _get_obj_list 
124      #-------------------------------------------------------- 
126          """Read list of image files from XML metadata file. 
127   
128          We assume the order of file names to correspond to the sequence of pages. 
129          - don't use self.__get_from_xml, because we want to 
130            scan lines sequentially here 
131          """ 
132          self.__data['objects'] = {} 
133          tag_name = self.__cfg.get(self.__group, "obj_tag") 
134          # now scan the xml file 
135          idx = 0 
136          for line in fileinput.input(self.__xml_file): 
137              content = self.__extract_xml_content(line, tag_name) 
138              if content is None: 
139                  continue 
140              idx += 1 
141              tmp = {} 
142              tmp['file name'] = os.path.abspath(os.path.join(self.__base_dir, content)) 
143              # this 'index' defines the order of objects in the document 
144              tmp['index'] = idx 
145              # we must use imaginary oid's since we are reading from a file, 
146              # this OID defines the object ID in the data store, this 
147              # has nothing to do with the semantic order of objects 
148              self.__data['objects'][idx] = tmp 
149   
150          # cleanup 
151          fileinput.close() 
152   
153          if idx == 0: 
154              _log.warning("no files found for import") 
155              return None 
156   
157          _log.debug("document data files to be processed: %s" % self.__data['objects']) 
158   
159          return 1         
160      #-------------------------------------------------------- 
161      # public methods 
162      #-------------------------------------------------------- 
167      #-------------------------------------------------------- 
169          # sanity 
170          if not type(aTag) is type(''): 
171              _log.error("Argument aTag (" + str(aTag) + ") is not a string.") 
172              return None 
173   
174          TagStart = "<" + aTag + ">" 
175          TagEnd = "</" + aTag + ">" 
176   
177          _log.info("Retrieving " + TagStart + "content" + TagEnd + ".") 
178   
179          inTag = 0 
180          content = [] 
181   
182          for line in fileinput.input(self.__xml_file): 
183              tmp = line 
184   
185              # this line starts a description 
186              if string.find(tmp, TagStart) != -1: 
187                  inTag = 1 
188                  # strip junk left of <tag> 
189                  (junk, good_stuff) = string.split (tmp, TagStart, 1) 
190                  _log.debug("Found tag start in line: junk='%s' content='%s'" % (junk, good_stuff)) 
191                  tmp = good_stuff 
192   
193              # this line ends a description 
194              if string.find(tmp, TagEnd) != -1: 
195                  # only if tag start has been found already 
196                  if inTag == 1: 
197                      # strip junk right of </tag> 
198                      (good_stuff, junk) = string.split (tmp, TagEnd, 1) 
199                      _log.debug("Found tag end in line: junk='%s' content='%s'" % (junk, good_stuff)) 
200                      content.append(good_stuff) 
201                      # shortcut out of for loop 
202                      break 
203   
204              # might be in-tag data line or line with start tag only 
205              if inTag == 1: 
206                  content.append(tmp) 
207   
208          # cleanup 
209          fileinput.close() 
210   
211          # looped over all lines 
212          if len(content) > 0: 
213              _log.debug("%s tag content successfully read: %s" % (TagStart, str(content))) 
214              return content 
215          else: 
216              return None 
217   
218      #-------------------------------------------------------- 
220          # is this a line we care about ? 
221          start_tag_pos = string.find(aLine,'<%s' % aTag) 
222          if start_tag_pos == -1: 
223              return None 
224          # yes, so check for closing tag 
225          end_tag_pos = string.find(aLine, '</%s>' % aTag) 
226          if end_tag_pos == -1: 
227              # but we don't do multiline tags 
228              _log.error("Line [%s] is incomplete for tag [%s]. We don't do multiline tags here."  % (aLine, aTag)) 
229              return None 
230          # actually extract content 
231          content_start = string.find(aLine,'>', start_tag_pos, end_tag_pos) + 1 
232          return aLine[content_start:end_tag_pos] 
233  #============================================================ 
234  # main 
235  #------------------------------------------------------------ 
236   
| Home | Trees | Indices | Help | 
 | 
|---|
| Generated by Epydoc 3.0.1 on Sat Feb 29 02:55:27 2020 | http://epydoc.sourceforge.net |