Interactive Parsing

version 3 by Jon Ingold

  • Home page
  • Beginning
  • Previous
  • Next



  • Section - Build the Dictionary metrics

    Include
    (-

    !Constant MAX_WORDS = 1000;
    !Constant AV_WORD_LENGTH = 6;
    Constant MAX_MULTIPLES = MAX_WORDS * AV_WORD_LENGTH;


    ! holds mapping from word -> position in multiples array
    Array dictionary_word_pointer --> MAX_WORDS;
    Array dictionary_word_first_letter -> MAX_WORDS;



    ! holds multiples, number of which is determined by length of word
    Array dictionary_word_multiples --> MAX_MULTIPLES;

    Array test_bed -> 50;


    [ CreateDictionaryMultiples
        i j word_count mult_pos avlen
        ;

        #ifdef DEBUG;
    ! print "Converting with space for ", MAX_WORDS, " words of average length ", AV_WORD_LENGTH, " and ", MAX_MULTIPLES, " multiples.^";
        #endif;
        
        if (dictlen > MAX_WORDS)
        {
            "***Interactive Parsing Error***: Dictionary storage is too small. Please increase dictionary storage to at least ", dictlen + 4, " using the command:^^Use maximum dictionary size of at least ", dictlen, ".^^";
        }

        mult_pos = 0;
        InitialiseMultiples();

        ! loop through dictionary words by index
        for (word_count = 0 : word_count < dictlen : word_count++)
        {
            ! store the start of this word's data
            dictionary_word_pointer --> word_count = mult_pos;
            
            ! get the dictionary word we're looking at
            VM_PrintToBuffer(test_bed, DICT_WORD_SIZE, WordFromIndex(word_count));

            dictionary_word_first_letter -> word_count = test_bed->WORDSIZE;
            
            #ifdef DEBUG;
                ! average word length stat
                avlen = avlen + ( (100 * test_bed-->0) - avlen) / (word_count + 1);
            #endif;

            if (test_bed-->0 == DICT_WORD_SIZE)
                StoreLongerVersionOf(WordFromIndex(word_count));
            
            ! loop through the characters of the word. Each stores at WORDSIZE + i
            for (i = 0 : i < test_bed --> 0 : i++)
            {
                if (PrimeFromCharacter(test_bed->(i + WORDSIZE)) == 0)
                {
                    ! this word had a comma in it
    ! print "Dodgy word found: ", (address) WordFromIndex(word_count), ".^";
                    for (j = 0 : j < test_bed-->0 : j ++) dictionary_word_multiples-->(j + mult_pos) = 1;
                    test_bed-->0 = 1;
                    break;
                }
            
                ! loop through the neighbourhood of the character
                ! to "add" this character into adjacents "sums"
                for (j = -MATCH_RANGE : j <= MATCH_RANGE : j++)
                {
                    if (j + i >= 0 && j + i < test_bed-->0)
                    {
                        dictionary_word_multiples-->(mult_pos + j + i) = dictionary_word_multiples-->(mult_pos + j + i) * PrimeFromCharacter(test_bed->(WORDSIZE + i));
                    }
                }
            }

            #ifdef TEST_CONVERTER;

                print "Converted ", (address) WordFromIndex(word_count), " into ";
                for (i = mult_pos : i < mult_pos + test_bed-->0 : i++)
                    print dictionary_word_multiples-->i, " / ";
                print "^";
                !if (VM_KeyChar() == 'q') rfalse;
            
            #endif;
            
            ! move the position in multiples along because we've created new entries
            mult_pos = mult_pos + (test_bed-->0);

            if (mult_pos + 20 > MAX_MULTIPLES)
            {
                "***Interactive Parsing Error***: Dictionary prime storage is too small. Please increase your word length allowance using the command:^^Use average word length of at least ", (mult_pos / word_count) + 2, ".^^";
            }

        }

        ! store last pointer, so we can calculate word lengths for the final word
        dictionary_word_pointer --> dictlen = mult_pos;
        
        #ifdef DEBUG;
    ! print "Finished with ", mult_pos, " metrics over ", dictlen, " words of average length ", avlen, "/100.^";
        #endif;
        
    ];

    [ InitialiseMultiples j;
        for (j = 0 : j < MAX_MULTIPLES : j++) dictionary_word_multiples-->j = 1;
    ];


    -) after "Parser.i6t".