Logo Search packages:      
Sourcecode: jlex version File versions  Download package

void JLex::CSimplifyNfa::computeClasses ( CSpec  m_spec  )  [inline, private]

Compute minimum set of character classes needed to disambiguate edges. We optimistically assume that every character belongs to a single character class, and then incrementally split classes as we see edges that require discrimination between characters in the class. [CSA, 25-Jul-1999]

Definition at line 2356 of file Main.java.

References JLex::SparseBitSet::and(), JLex::SparseBitSet::clearAll(), JLex::SparseBitSet::get(), JLex::SparseBitSet::set(), and JLex::SparseBitSet::size.

    this.original_charset_size = m_spec.m_dtrans_ncols;
    this.ccls = new int[original_charset_size]; // initially all zero.

    int nextcls = 1;
    SparseBitSet clsA = new SparseBitSet(), clsB = new SparseBitSet();
    Hashtable h = new Hashtable();
    System.out.print("Working on character classes.");
    for (Enumeration e=m_spec.m_nfa_states.elements(); e.hasMoreElements(); ) {
      CNfa nfa = (CNfa) e.nextElement();
      if (nfa.m_edge==CNfa.EMPTY || nfa.m_edge==CNfa.EPSILON)
      continue; // no discriminatory information.
      clsA.clearAll(); clsB.clearAll();
      for (int i=0; i<ccls.length; i++)
      if (nfa.m_edge==i || // edge labeled with a character
          nfa.m_edge==CNfa.CCL && nfa.m_set.contains(i)) // set of characters
      // now figure out which character classes we need to split.
      clsA.and(clsB); // split the classes which show up on both sides of edge
      if (clsA.size()==0) continue; // nothing to do.
      // and split them.
      h.clear(); // h will map old to new class name
      for (int i=0; i<ccls.length; i++)
      if (clsA.get(ccls[i])) // a split class
        if (nfa.m_edge==i ||
            nfa.m_edge==CNfa.CCL && nfa.m_set.contains(i)) { // on A side
          Integer split = new Integer(ccls[i]);
          if (!h.containsKey(split))
            h.put(split, new Integer(nextcls++)); // make new class
          ccls[i] = ((Integer)h.get(split)).intValue();
    System.out.println("NFA has "+nextcls+" distinct character classes.");
    this.mapped_charset_size = nextcls;

Generated by  Doxygen 1.6.0   Back to index